Skip to content

Commit

Permalink
doc: fix type hints
Browse files Browse the repository at this point in the history
  • Loading branch information
MariellaCC committed Apr 9, 2024
1 parent 4326e20 commit 44a3478
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions src/kiara_plugin/topic_modelling/modules/pre_process.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# -*- coding: utf-8 -*-
from typing import Optional, List
from typing import List, Optional
from kiara.api import KiaraModule
from kiara.exceptions import KiaraProcessingException
from pyarrow import Table as PyArrowTable


class TokenizeCorpus(KiaraModule):
Expand Down Expand Up @@ -54,12 +53,12 @@ def process(self, inputs, outputs):

import nltk # type: ignore
import pyarrow as pa # type: ignore
from nltk.tokenize.simple import CharTokenizer
from nltk.tokenize.simple import CharTokenizer # type: ignore

nltk.download("punkt")

tokenized_list: Optional[List[str]] = None
table_pa: Optional[PyArrowTable] = None
tokenized_list = None
table_pa = None

# check that both inputs table and array are not set simultaneously
if inputs.get_value_obj("corpus_table").is_set and inputs.get_value_obj("corpus_array").is_set:
Expand Down Expand Up @@ -91,7 +90,7 @@ def process(self, inputs, outputs):

corpus_list = corpus_array_pa.to_pylist()

def tokenize(text: str, tokenize_by_character:bool = False) -> Optional[List[str]]:
def tokenize(text: str, tokenize_by_character:bool = False):
if not tokenize_by_character:
try:
return nltk.word_tokenize(str(text))
Expand Down

0 comments on commit 44a3478

Please sign in to comment.