Skip to content

Commit

Permalink
Add types and use joined string literals for keys
Browse files Browse the repository at this point in the history
  • Loading branch information
lionel-rowe committed Apr 28, 2023
1 parent 15ed36c commit 4de7282
Show file tree
Hide file tree
Showing 8 changed files with 236 additions and 50,515 deletions.
4 changes: 2 additions & 2 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
vocab.bpe
encoder.json
vocab-*.bpe
token-mapping-*.json
232 changes: 0 additions & 232 deletions codec.js

This file was deleted.

33 changes: 16 additions & 17 deletions codec.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import tokenMapping from './token-mapping-gpt3.json' assert { type: 'json' }
import { encode, decode } from './codec.js'
import { encode, decode } from './codec.ts'
import { assertEquals } from 'https://deno.land/[email protected]/testing/asserts.ts'
const bpe = await (await fetch(import.meta.resolve('./vocab-gpt3.bpe'))).text()

const options = { tokenMapping, bpe }

Deno.test('docs', async (t) => {
const docFiles = ['./README.md']

Expand All @@ -26,43 +24,44 @@ Deno.test('docs', async (t) => {

Deno.test('empty string', () => {
const str = ''
assertEquals(encode(str, options), [])
assertEquals(decode(encode(str, options), options), str)
assertEquals(encode(str, { tokenMapping, bpe }), [])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})

Deno.test('space', () => {
const str = ' '
assertEquals(encode(str, options), [220])
assertEquals(decode(encode(str, options), options), str)
assertEquals(encode(str, { tokenMapping, bpe }), [220])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})

Deno.test('tab', () => {
const str = '\t'
assertEquals(encode(str, options), [197])
assertEquals(decode(encode(str, options), options), str)
assertEquals(encode(str, { tokenMapping, bpe }), [197])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})

Deno.test('simple text', () => {
const str = 'This is some text'
assertEquals(encode(str, options), [1212, 318, 617, 2420])
assertEquals(decode(encode(str, options), options), str)
assertEquals(encode(str, { tokenMapping, bpe }), [1212, 318, 617, 2420])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})

Deno.test('multi-token word', () => {
const str = 'indivisible'
assertEquals(encode(str, options), [521, 452, 12843])
assertEquals(decode(encode(str, options), options), str)
assertEquals(encode(str, { tokenMapping, bpe }), [521, 452, 12843])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})

Deno.test('emojis', () => {
const str = 'hello 👋 world 🌍'
assertEquals(encode(str, options), [31373, 50169, 233, 995, 12520, 234, 235])
assertEquals(decode(encode(str, options), options), str)

assertEquals(encode(str, { tokenMapping, bpe }), [31373, 50169, 233, 995, 12520, 234, 235])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})

Deno.test('properties of Object', () => {
const str = 'toString constructor hasOwnProperty valueOf'

assertEquals(encode(str, options), [1462, 10100, 23772, 468, 23858, 21746, 1988, 5189])
assertEquals(decode(encode(str, options), options), str)
assertEquals(encode(str, { tokenMapping, bpe }), [1462, 10100, 23772, 468, 23858, 21746, 1988, 5189])
assertEquals(decode(encode(str, { tokenMapping, bpe }), { tokenMapping }), str)
})
Loading

0 comments on commit 4de7282

Please sign in to comment.