diff options
| author | zhang <zch921005@126.com> | 2022-08-06 19:36:09 +0800 |
|---|---|---|
| committer | zhang <zch921005@126.com> | 2022-08-06 19:36:09 +0800 |
| commit | 6ff967aaa317073b43c8764386823191cdf8656c (patch) | |
| tree | 8832d2fb5d202f4fd09c5f88c8402f6746af3573 /nlp/gensim_demo | |
| parent | fd4e40ae2ae58c06226cc9eb4c2ae9bdcfb677fd (diff) | |
update
Diffstat (limited to 'nlp/gensim_demo')
| -rw-r--r-- | nlp/gensim_demo/w2c.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/nlp/gensim_demo/w2c.py b/nlp/gensim_demo/w2c.py index bf4ccb1..cf3cd92 100644 --- a/nlp/gensim_demo/w2c.py +++ b/nlp/gensim_demo/w2c.py @@ -6,13 +6,21 @@ from gensim.matutils import unitvec if __name__ == '__main__': + # 包含了句子以及分词的处理 # sentences = word2vec.Text8Corpus('text8') + # # sentences = list of list of words # model = Word2Vec(sentences, workers=cpu_count()//2) # model.save('text8.model') + model = Word2Vec.load('text8.model') + + # model.wv.vectors, model.wv.index2word # woman + king - man == ? + # woman + king - man == queen print(model.most_similar(positive=['woman', 'king'], negative=['man'], topn=2)) + + woman_vec = model.wv.word_vec('woman', use_norm=True) king_vec = model.wv.word_vec('king', use_norm=True) man_vec = model.wv.word_vec('man', use_norm=True) |
