summaryrefslogtreecommitdiff
path: root/nlp/gensim_demo
diff options
context:
space:
mode:
Diffstat (limited to 'nlp/gensim_demo')
-rw-r--r--nlp/gensim_demo/w2c.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/nlp/gensim_demo/w2c.py b/nlp/gensim_demo/w2c.py
index bf4ccb1..cf3cd92 100644
--- a/nlp/gensim_demo/w2c.py
+++ b/nlp/gensim_demo/w2c.py
@@ -6,13 +6,21 @@ from gensim.matutils import unitvec
if __name__ == '__main__':
+ # 包含了句子以及分词的处理
# sentences = word2vec.Text8Corpus('text8')
+ # # sentences = list of list of words
# model = Word2Vec(sentences, workers=cpu_count()//2)
# model.save('text8.model')
+
model = Word2Vec.load('text8.model')
+
+ # model.wv.vectors, model.wv.index2word
# woman + king - man == ?
+ # woman + king - man == queen
print(model.most_similar(positive=['woman', 'king'], negative=['man'], topn=2))
+
+
woman_vec = model.wv.word_vec('woman', use_norm=True)
king_vec = model.wv.word_vec('king', use_norm=True)
man_vec = model.wv.word_vec('man', use_norm=True)