From 6ff967aaa317073b43c8764386823191cdf8656c Mon Sep 17 00:00:00 2001 From: zhang Date: Sat, 6 Aug 2022 19:36:09 +0800 Subject: update --- nlp/gensim_demo/w2c.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'nlp/gensim_demo/w2c.py') diff --git a/nlp/gensim_demo/w2c.py b/nlp/gensim_demo/w2c.py index bf4ccb1..cf3cd92 100644 --- a/nlp/gensim_demo/w2c.py +++ b/nlp/gensim_demo/w2c.py @@ -6,13 +6,21 @@ from gensim.matutils import unitvec if __name__ == '__main__': + # 包含了句子以及分词的处理 # sentences = word2vec.Text8Corpus('text8') + # # sentences = list of list of words # model = Word2Vec(sentences, workers=cpu_count()//2) # model.save('text8.model') + model = Word2Vec.load('text8.model') + + # model.wv.vectors, model.wv.index2word # woman + king - man == ? + # woman + king - man == queen print(model.most_similar(positive=['woman', 'king'], negative=['man'], topn=2)) + + woman_vec = model.wv.word_vec('woman', use_norm=True) king_vec = model.wv.word_vec('king', use_norm=True) man_vec = model.wv.word_vec('man', use_norm=True) -- cgit v1.2.3