From a15093adad328a650d421e53c078cbd2c45beb0e Mon Sep 17 00:00:00 2001 From: Will DePue Date: Wed, 18 Mar 2026 09:32:01 -0700 Subject: Launch snapshot --- data/tokenizer_specs.json | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 data/tokenizer_specs.json (limited to 'data/tokenizer_specs.json') diff --git a/data/tokenizer_specs.json b/data/tokenizer_specs.json new file mode 100644 index 0000000..d7ad1ca --- /dev/null +++ b/data/tokenizer_specs.json @@ -0,0 +1,9 @@ +{ + "tokenizers": [ + { + "name": "sp_bpe_1024", + "dataset_suffix": "sp1024", + "vocab_size": 1024 + } + ] +} -- cgit v1.2.3