From 22a537f3f63e8d0698f50c7e0a16677d51f27989 Mon Sep 17 00:00:00 2001 From: jerome Date: Wed, 14 Nov 2018 14:30:06 +0800 Subject: [PATCH 1/2] Increase the characteristics of the specified language during the run --- langid/__init__.py | 2 +- langid/langid.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/langid/__init__.py b/langid/__init__.py index 2c0a7a5c..0a09e897 100644 --- a/langid/__init__.py +++ b/langid/__init__.py @@ -1 +1 @@ -from .langid import classify, rank, set_languages +from .langid import classify, rank, set_languages, classify_with_language diff --git a/langid/langid.py b/langid/langid.py index 5c44aab7..9a45e24a 100755 --- a/langid/langid.py +++ b/langid/langid.py @@ -106,6 +106,24 @@ def classify(instance): return identifier.classify(instance) + +def classify_with_language(instance, langs): + """ + Convenience method using a global identifier instance with the default + model included in langid.py. Identifies the language that a string is + written in. + + @param instance a text string. Unicode strings will automatically be utf8-encoded + param langs. Increase the characteristics of the specified language during the run + @returns a tuple of the most likely language and the confidence score + """ + global identifier + if identifier is None: + load_model() + + return identifier.classify_with_language(instance, langs) + + def rank(instance): """ Convenience method using a global identifier instance with the default @@ -288,6 +306,11 @@ def nb_classprobs(self, fv): pd = pdc + self.nb_pc return pd + def classify_with_language(self, text, langs): + self.set_languages(langs=langs) + + self.classify(text) + def classify(self, text): """ Classify an instance. From 7ee577c8bdbdb9b541c8ad453d8eb5ff1a2190fe Mon Sep 17 00:00:00 2001 From: jerome Date: Wed, 14 Nov 2018 15:44:59 +0800 Subject: [PATCH 2/2] fix buf. no return in classify --- langid/langid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langid/langid.py b/langid/langid.py index 9a45e24a..6eb4b5b5 100755 --- a/langid/langid.py +++ b/langid/langid.py @@ -309,7 +309,7 @@ def nb_classprobs(self, fv): def classify_with_language(self, text, langs): self.set_languages(langs=langs) - self.classify(text) + return self.classify(text) def classify(self, text): """