Skip to content

Commit

Permalink
Bag of Words: store tokens if internally processed
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Jul 9, 2024
1 parent 101f30e commit 02f6c3c
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion orangecontrib/text/vectorization/bagofwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ def _transform(self, corpus, source_dict=None, callback=dummy_callback):
if len(corpus) == 0:
return corpus
temp_corpus = list(corpus.ngrams_iterator(' ', include_postags=True))
dic = corpora.Dictionary(temp_corpus, prune_at=None) if not source_dict else source_dict
if not source_dict:
corpus.store_tokens(temp_corpus)
dic = corpora.Dictionary(temp_corpus, prune_at=None)
else:
dic = source_dict
if len(dic) == 0:
return corpus
callback(0.3)
Expand Down

0 comments on commit 02f6c3c

Please sign in to comment.