From 3ab853d8ebdb8d8f1a31d6ef2150c304d3061ff3 Mon Sep 17 00:00:00 2001 From: Richard Jackson Date: Wed, 18 Sep 2024 09:39:48 +0100 Subject: [PATCH] bunch of nit picks on tokenized_word_processor.py Co-authored-by: Szymon Palucha --- kazu/steps/ner/tokenized_word_processor.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kazu/steps/ner/tokenized_word_processor.py b/kazu/steps/ner/tokenized_word_processor.py index d308166a..82034427 100644 --- a/kazu/steps/ner/tokenized_word_processor.py +++ b/kazu/steps/ner/tokenized_word_processor.py @@ -43,7 +43,7 @@ class TokWordSpan: class SpanFinder(ABC): - def __init__(self, text: str, id2label: dict[int, str]): + def __init__(self, text: str, id2label: dict[int, str]) -> None: """ :param text: the raw text to be processed @@ -163,8 +163,7 @@ def span_continue_condition( classes_set = set(x[1] for x in bio_and_class_labels) if None in classes_set or self.text[word.word_char_start - 1] in self.span_breaking_chars: return False - else: - return True + return True def get_bio_and_class_labels(self, word: TokenizedWord) -> set[tuple[str, Optional[str]]]: """Return a set of tuple[,Optional[]] for a @@ -294,8 +293,7 @@ def span_continue_condition(self, word: TokenizedWord, class_labels: set[str]) - """ if not class_labels or self.text[word.word_char_start - 1] in self.span_breaking_chars: return False - else: - return True + return True def process_next_word(self, word: TokenizedWord) -> None: """Process the next word in the sequence, updating span information accordingly. @@ -362,8 +360,7 @@ def _make_multilabel_span_finder(self, text: str) -> MultilabelSpanFinder: def make_span_finder(self, text: str) -> SpanFinder: if self.use_multilabel: return self._make_multilabel_span_finder(text) - else: - return self._make_simple_span_finder(text) + return self._make_simple_span_finder(text) def __call__(self, words: list[TokenizedWord], text: str, namespace: str) -> list[Entity]: span_finder: SpanFinder = self.make_span_finder(text)