From 8727b9ff6fe0059905f8331f8409dd360bdd7a0b Mon Sep 17 00:00:00 2001 From: robert Date: Thu, 12 Jul 2018 11:01:44 +0200 Subject: [PATCH] owpreprocess: fix no stopword files on win --- orangecontrib/text/preprocess/filter.py | 22 +++++++++++++--------- orangecontrib/text/widgets/owpreprocess.py | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/orangecontrib/text/preprocess/filter.py b/orangecontrib/text/preprocess/filter.py index 55ba64321..f79ab09fa 100644 --- a/orangecontrib/text/preprocess/filter.py +++ b/orangecontrib/text/preprocess/filter.py @@ -61,20 +61,24 @@ def from_file(self, path): # No encoding worked, raise raise UnicodeError("Couldn't determine file encoding") -# get NLTK list of stopwords -stopwords_listdir = [] -try: - stopwords_listdir = [file for file in os.listdir(stopwords._get_root()) - if file.islower()] -except LookupError: # when no NLTK data is available - pass - class StopwordsFilter(BaseTokenFilter, WordListMixin): """ Remove tokens present in NLTK's language specific lists or a file. """ name = 'Stopwords' - supported_languages = [file.capitalize() for file in stopwords_listdir] + @staticmethod + @wait_nltk_data + def supported_languages(): + # get NLTK list of stopwords + stopwords_listdir = [] + try: + stopwords_listdir = [file for file in + os.listdir(stopwords._get_root()) + if file.islower()] + except LookupError: # when no NLTK data is available + pass + + return [file.capitalize() for file in stopwords_listdir] @wait_nltk_data def __init__(self, language='English', word_list=None): diff --git a/orangecontrib/text/widgets/owpreprocess.py b/orangecontrib/text/widgets/owpreprocess.py index 17164d837..1c9f5fdb6 100644 --- a/orangecontrib/text/widgets/owpreprocess.py +++ b/orangecontrib/text/widgets/owpreprocess.py @@ -331,7 +331,7 @@ def __init__(self, master): super().__init__(master) box = widgets.ComboBox(self, 'stopwords_language', - items=[None] + preprocess.StopwordsFilter.supported_languages) + items=[None] + preprocess.StopwordsFilter.supported_languages()) box.currentIndexChanged.connect(self.stopwords_changed) self.stopwords_changed() self.method_layout.addWidget(box, self.STOPWORDS, 1)