From 2180c68999eb8dc0c7bcec015b2703f5b8b20223 Mon Sep 17 00:00:00 2001 From: zhang Date: Wed, 4 May 2022 08:47:54 +0800 Subject: ndarray axis --- learn_torch/text_transformer.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 learn_torch/text_transformer.py (limited to 'learn_torch/text_transformer.py') diff --git a/learn_torch/text_transformer.py b/learn_torch/text_transformer.py new file mode 100644 index 0000000..1eb3877 --- /dev/null +++ b/learn_torch/text_transformer.py @@ -0,0 +1,8 @@ +from torchtext.datasets import WikiText2 +from torchtext.data.utils import get_tokenizer +from torchtext.vocab import build_vocab_from_iterator + +train_iter = WikiText2(split='train') +tokenizer = get_tokenizer('basic_english') +vocab = build_vocab_from_iterator(map(tokenizer, train_iter), specials=['']) +vocab.set_default_index(vocab['']) \ No newline at end of file -- cgit v1.2.3