From 001722a6fc43d6ce73e9df003eb8e582f8abbc47 Mon Sep 17 00:00:00 2001 From: Primoz Godec Date: Wed, 27 Apr 2022 12:05:12 +0200 Subject: [PATCH] Database: Pivoting table Normalize: fix unpickling for Normalizers before caching was implemented --- MANIFEST.in | 2 +- orangecontrib/text/preprocess/normalize.py | 8 +++++++- orangecontrib/text/tests/normalizer-v1.pkl | Bin 0 -> 189 bytes orangecontrib/text/tests/test_preprocess.py | 10 ++++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 orangecontrib/text/tests/normalizer-v1.pkl diff --git a/MANIFEST.in b/MANIFEST.in index dd98d4aa3..44485ac82 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ recursive-include orangecontrib/text/datasets *.tab *.txt recursive-include orangecontrib/text/models *.ftz recursive-include orangecontrib/text/sentiment *.txt -recursive-include orangecontrib/text/tests *.txt *.json +recursive-include orangecontrib/text/tests *.txt *.json *.pkl recursive-include orangecontrib/text/tutorials *.ows recursive-include orangecontrib/text/widgets/icons *.svg *.png *.ai recursive-include orangecontrib/text/widgets/resources *.js *.css *.html diff --git a/orangecontrib/text/preprocess/normalize.py b/orangecontrib/text/preprocess/normalize.py index 7a5df3047..735666714 100644 --- a/orangecontrib/text/preprocess/normalize.py +++ b/orangecontrib/text/preprocess/normalize.py @@ -48,6 +48,12 @@ def __getstate__(self): d["_normalization_cache"] = {} return d + def __setstate__(self, state): + self.__dict__.update(state) + # support old pickles (before caching was implemented) that are missing + # _normalization_cache + self._normalization_cache = {} + class WordNetLemmatizer(BaseNormalizer): name = 'WordNet Lemmatizer' @@ -201,7 +207,7 @@ def __setstate__(self, state): Note: __model will be loaded on __call__ """ - self.__dict__.update(state) + super().__setstate__(state) self.models = UDPipeModels() diff --git a/orangecontrib/text/tests/normalizer-v1.pkl b/orangecontrib/text/tests/normalizer-v1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..34cf36593813e8f30d28760a74aeb96276a8a3eb GIT binary patch literal 189 zcmZo*nYxJq0(#W*ixTtFQWEQ0Qq~_)(mH