summaryrefslogtreecommitdiff
path: root/data/tokenizer_specs.json
diff options
context:
space:
mode:
Diffstat (limited to 'data/tokenizer_specs.json')
-rw-r--r--data/tokenizer_specs.json9
1 files changed, 9 insertions, 0 deletions
diff --git a/data/tokenizer_specs.json b/data/tokenizer_specs.json
new file mode 100644
index 0000000..d7ad1ca
--- /dev/null
+++ b/data/tokenizer_specs.json
@@ -0,0 +1,9 @@
+{
+ "tokenizers": [
+ {
+ "name": "sp_bpe_1024",
+ "dataset_suffix": "sp1024",
+ "vocab_size": 1024
+ }
+ ]
+}