summaryrefslogtreecommitdiff
path: root/data/tokenizer_specs.json
diff options
context:
space:
mode:
authorWill DePue <williamd@openai.com>2026-03-18 09:32:01 -0700
committerWill DePue <williamd@openai.com>2026-03-18 09:32:01 -0700
commita15093adad328a650d421e53c078cbd2c45beb0e (patch)
treee054c4bde12b89e6d3b39d611d9caadabc7f7234 /data/tokenizer_specs.json
Launch snapshot
Diffstat (limited to 'data/tokenizer_specs.json')
-rw-r--r--data/tokenizer_specs.json9
1 files changed, 9 insertions, 0 deletions
diff --git a/data/tokenizer_specs.json b/data/tokenizer_specs.json
new file mode 100644
index 0000000..d7ad1ca
--- /dev/null
+++ b/data/tokenizer_specs.json
@@ -0,0 +1,9 @@
+{
+ "tokenizers": [
+ {
+ "name": "sp_bpe_1024",
+ "dataset_suffix": "sp1024",
+ "vocab_size": 1024
+ }
+ ]
+}