Skip to content

Commit

Permalink
Merge pull request #838 from PrimozGodec/fix-normalization
Browse files Browse the repository at this point in the history
[FIX] Normalize - fix unpickling for Normalizers before caching was implemented
  • Loading branch information
ajdapretnar authored Jun 3, 2022
2 parents d9ac818 + 80c0137 commit b196b6b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
8 changes: 7 additions & 1 deletion orangecontrib/text/preprocess/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ def __getstate__(self):
d["_normalization_cache"] = {}
return d

def __setstate__(self, state):
self.__dict__.update(state)
# support old pickles (before caching was implemented) that are missing
# _normalization_cache
self._normalization_cache = {}


class WordNetLemmatizer(BaseNormalizer):
name = 'WordNet Lemmatizer'
Expand Down Expand Up @@ -201,7 +207,7 @@ def __setstate__(self, state):
Note: __model will be loaded on __call__
"""
self.__dict__.update(state)
super().__setstate__(state)
self.models = UDPipeModels()


Expand Down
Binary file added orangecontrib/text/tests/normalizer-v1.pkl
Binary file not shown.
10 changes: 10 additions & 0 deletions orangecontrib/text/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,16 @@ def test_cache(self):
loaded_normalizer = pickle.loads(pickle.dumps(normalizer))
self.assertEqual(0, len(loaded_normalizer._normalization_cache))

def test_nocache_normalizer_restorable(self):
"""
Pickled normalizers made before implementing cache in normalizer must
load correctly
"""
test_folder = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(test_folder, "normalizer-v1.pkl"), "rb") as f:
loaded_normalizer = pickle.load(f)
loaded_normalizer(self.corpus)


class UDPipeModelsTests(unittest.TestCase):
def test_label_transform(self):
Expand Down

0 comments on commit b196b6b

Please sign in to comment.