From f90b2a0d4d81a4d8144ff418ef8f8f1453e07729 Mon Sep 17 00:00:00 2001 From: Gustavo Rosa Date: Wed, 19 Jan 2022 12:55:21 -0300 Subject: [PATCH] chore(nalp): Changes support for gensim v4+. --- nalp/encoders/word2vec.py | 7 +++---- requirements.txt | 2 +- setup.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/nalp/encoders/word2vec.py b/nalp/encoders/word2vec.py index ca32d37..0c2e4c0 100644 --- a/nalp/encoders/word2vec.py +++ b/nalp/encoders/word2vec.py @@ -46,8 +46,8 @@ def learn(self, tokens, max_features=128, window_size=5, min_count=1, """ - self.encoder = W2V(sentences=[tokens], size=max_features, window=window_size, min_count=min_count, - sg=algorithm, alpha=learning_rate, iter=iterations, + self.encoder = W2V(sentences=[tokens], vector_size=max_features, window=window_size, min_count=min_count, + sg=algorithm, alpha=learning_rate, epochs=iterations, workers=multiprocessing.cpu_count()) def encode(self, tokens): @@ -94,7 +94,6 @@ def decode(self, encoded_tokens): raise RuntimeError(e) - decoded_tokens = [self.encoder.wv.most_similar( - positive=[t])[0][0] for t in encoded_tokens] + decoded_tokens = [self.encoder.wv.most_similar(positive=[t])[0][0] for t in encoded_tokens] return decoded_tokens diff --git a/requirements.txt b/requirements.txt index 8d7744f..71e62e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ coverage>=5.5 -gensim>=3.8.3 +gensim>=4.1.2 matplotlib>=3.3.4 mido>=1.2.9 nltk>=3.5 diff --git a/setup.py b/setup.py index c77e4ad..49113ad 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ url='https://github.com/gugarosa/nalp', license='Apache 2.0', install_requires=['coverage>=5.5', - 'gensim>=3.8.3', + 'gensim>=4.1.2', 'matplotlib>=3.3.4', 'mido>=1.2.9', 'nltk>=3.5',