add Language scanner:

- use it instead of teh global itr counter, it allows to scan only a certain amount ofg languages - add PartialLanguageScanner as class, that takes a config and only scans a few languages, see README.md for an example config file - add FullLanguageScanner as option, to scan everything
Totto16 · Aug 14, 2023 · 00f23a7 · 00f23a7
1 parent 43aed17
commit 00f23a7
Show file tree

Hide file tree

Showing 8 changed files with 181 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ model/
 /.vscode
 /.coverage
 /coverage.xml
+/config.ini
diff --git a/README.md b/README.md
@@ -6,3 +6,12 @@ VERSION 2: Breaking changes (how the data ist stored) !
 ## Customize
 
 Folder, title etc.
+
+
+## config.ini example
+
+```ini
+[settings]
+scan_amount= 100
+start_position = 0
+```
diff --git a/src/content/base_class.py b/src/content/base_class.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass, field
+from enum import Enum
 from os import listdir
 from pathlib import Path
 from typing import Any, Optional, Self, TypedDict
@@ -10,6 +11,7 @@
 from content.general import (
     Callback,
     ContentType,
+    EpisodeDescription,
     MissingOverrideError,
     ScannedFile,
     ScannedFileType,
@@ -25,8 +27,21 @@ class ContentDict(TypedDict):
     scanned_file: ScannedFile
 
 
-# TODO
-CallbackTuple = tuple[Manager, Classifier]
+class ScanType(Enum):
+    first_scan = "first_scan"
+    rescan = "rescan"
+
+
+class LanguageScanner:
+    def should_scan(
+        self: Self,
+        description: EpisodeDescription,  # noqa: ARG002
+        scan_type: ScanType,  # noqa: ARG002
+    ) -> bool:
+        raise MissingOverrideError
+
+
+CallbackTuple = tuple[Manager, Classifier, LanguageScanner]
 
 
 @dataclass(slots=True, repr=True)
@@ -55,7 +70,9 @@ def generate_checksum(self: Self, manager: Manager) -> None:
     def scan(
         self: Self,
         callback: Callback[  # noqa: ARG002
-            "Content", ContentCharacteristic, CallbackTuple,
+            "Content",
+            ContentCharacteristic,
+            CallbackTuple,
         ],
         *,
         parent_folders: list[str],  # noqa: ARG002
@@ -97,7 +114,9 @@ def process_folder(
         results: list[Content] = []
         for file_path, file_type, parent_folders in temp:
             result: Optional[Content] = callback.process(
-                file_path, file_type, parent_folders,
+                file_path,
+                file_type,
+                parent_folders,
             )
             value = (
                 result.type if result is not None else None,

diff --git a/src/content/episode_content.py b/src/content/episode_content.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass, field
-from enum import Enum
 from pathlib import Path
 from typing import (
     Literal,
@@ -17,6 +16,7 @@
     Content,
     ContentCharacteristic,
     ContentDict,
+    ScanType,
 )
 from content.general import (
     Callback,
@@ -34,26 +34,6 @@ class EpisodeContentDict(ContentDict):
     language: Language
 
 
-# TODO: remove
-GLOBAL_ITER_MAX: int = 200
-SKIP_ITR: int = 530
-itr: int = 0
-
-
-# TODO: remove
-def itr_print_percent() -> None:
-    global itr  # noqa: PLW0602
-    if itr < SKIP_ITR:
-        return
-
-    if itr >= GLOBAL_ITER_MAX + SKIP_ITR:
-        return
-
-    percent: float = (itr - SKIP_ITR) / GLOBAL_ITER_MAX * 100.0
-
-    print(f"{percent:.02f} %")
-
-
 @schema(extra=narrow_type(("type", Literal[ContentType.episode])))
 @dataclass(slots=True, repr=True)
 class EpisodeContent(Content):
@@ -145,7 +125,7 @@ def scan(
         parent_folders: list[str],
         rescan: bool = False,
     ) -> None:
-        manager, classifier = callback.get_saved()
+        manager, classifier, scanner = callback.get_saved()
 
         characteristic: ContentCharacteristic = (self.type, self.scanned_file.type)
 
@@ -161,13 +141,8 @@ def scan(
                         characteristic,
                     )
 
-                    # TODO: remove
-                    global itr  # noqa: PLW0603
-                    if itr < GLOBAL_ITER_MAX + SKIP_ITR:
-                        itr_print_percent()
-                        itr = itr + 1
-                        if itr >= SKIP_ITR:
-                            self.__language = self.__get_language(classifier, manager)
+                    if scanner.should_scan(self.__description, ScanType.rescan):
+                        self.__language = self.__get_language(classifier, manager)
 
                     callback.progress(
                         self.scanned_file.path.name,
@@ -197,12 +172,8 @@ def scan(
             characteristic,
         )
 
-        # TODO: re-enable
-        if itr < GLOBAL_ITER_MAX + SKIP_ITR:
-            itr_print_percent()
-            itr = itr + 1
-            if itr >= SKIP_ITR:
-                self.__language = self.__get_language(classifier, manager)
+        if scanner.should_scan(self.__description, ScanType.first_scan):
+            self.__language = self.__get_language(classifier, manager)
 
         callback.progress(
             self.scanned_file.path.name,

diff --git a/src/content/general.py b/src/content/general.py
@@ -22,7 +22,7 @@
 from enlighten import Manager
 
 
-class ScannedFileType(str, Enum):
+class ScannedFileType(Enum):
     file = "file"
     folder = "folder"
 

diff --git a/src/content/scanner.py b/src/content/scanner.py
@@ -0,0 +1,130 @@
+import json
+from configparser import ConfigParser
+from pathlib import Path
+from typing import Optional, Self, TypedDict
+
+from helper.timestamp import parse_int_safely
+from typing_extensions import override
+
+from content.base_class import LanguageScanner, ScanType
+from content.general import EpisodeDescription
+
+
+class StaticLanguageScanner(LanguageScanner):
+    __value: bool
+
+    def __init__(self: Self,*, value: bool) -> None:
+        super().__init__()
+        self.__value = value
+
+    @override
+    def should_scan(
+        self: Self,
+        description: EpisodeDescription,
+        scan_type: ScanType,
+    ) -> bool:
+        return self.__value
+
+
+class FullLanguageScanner(StaticLanguageScanner):
+    def __init__(
+        self: Self,
+    ) -> None:
+        super().__init__(value=True)
+
+
+class NoLanguageScanner(StaticLanguageScanner):
+    def __init__(
+        self: Self,
+    ) -> None:
+        super().__init__(value=True)
+
+
+class PartialScannerDict(TypedDict, total=False):
+    start_position: int
+    scan_amount: int
+
+
+class PartialScannerDictTotal(TypedDict, total=True):
+    start_position: int
+    scan_amount: int
+
+
+INI_SETTINGS_SECTION_KEY = "settings"
+
+
+class PartialLanguageScanner(LanguageScanner):
+    __start_position: int
+    __scan_amount: int
+    __current_position: int
+
+    @property
+    def __defaults(self: Self) -> PartialScannerDictTotal:
+        return {"start_position": 0, "scan_amount": 100}
+
+    def __init__(self: Self, config_file: Path = Path("./config.ini")) -> None:
+        super().__init__()
+        loaded_dict: Optional[PartialScannerDict] = None
+
+        if config_file.exists():
+            with config_file.open(mode="r") as file:
+                suffix: str = config_file.suffix[1:]
+                match suffix:
+                    case "json":
+                        loaded_dict = json.load(file)
+                    case "ini":
+                        config = ConfigParser()
+                        config.read(config_file)
+                        if INI_SETTINGS_SECTION_KEY in config:
+                            temp_dict = dict(config.items(INI_SETTINGS_SECTION_KEY))
+
+                            loaded_dict = {}
+                            if temp_dict.get("start_position", None) is not None:
+                                int_result = parse_int_safely(
+                                    temp_dict["start_position"],
+                                )
+                                if int_result is not None:
+                                    loaded_dict["start_position"] = int_result
+                            if temp_dict.get("scan_amount", None) is not None:
+                                int_result = parse_int_safely(
+                                    temp_dict["scan_amount"],
+                                )
+                                if int_result is not None:
+                                    loaded_dict["scan_amount"] = int_result
+
+                    case _:
+                        msg = f"Config not loadable from '{suffix}' file!"
+                        raise RuntimeError(msg)
+
+        if loaded_dict is not None:
+            self.__start_position = loaded_dict.get(
+                "start_position",
+                self.__defaults["start_position"],
+            )
+            self.__scan_amount = loaded_dict.get(
+                "scan_amount",
+                self.__defaults["scan_amount"],
+            )
+        else:
+            self.__start_position = self.__defaults["start_position"]
+            self.__scan_amount = self.__defaults["scan_amount"]
+
+        self.__current_position = 0
+
+    @override
+    def should_scan(
+        self: Self,
+        description: EpisodeDescription,
+        scan_type: ScanType,
+    ) -> bool:
+        if (self.__start_position >= self.__current_position) and (
+            self.__start_position < (self.__current_position + self.__scan_amount)
+        ):
+            self.__current_position += 1
+
+        self.__current_position += 1
+        return False
+
+
+# TODO add time based scanner
+# TODO optionally ask user for input each x steps as option for other scanners
diff --git a/src/entry.py b/src/entry.py
@@ -10,6 +10,7 @@
 from classifier import Language
 from content.base_class import Content  # noqa: TCH002
 from content.general import NameParser, Summary
+from content.scanner import PartialLanguageScanner
 from helper.timestamp import parse_int_safely
 from main import AllContent, generate_json_schema, parse_contents
 from typing_extensions import override
@@ -105,7 +106,7 @@ def main() -> None:
         },
         Path("data/data.json"),
         name_parser=CustomNameParser(SPECIAL_NAMES),
-        # TODO: make it possible to define to do a full scan or not (full= with language detection, not full = without)
+        scanner=PartialLanguageScanner(Path("./config.ini")),
     )
 
     summaries = [content.summary() for content in contents]

diff --git a/src/main.py b/src/main.py
@@ -8,6 +8,7 @@
     CallbackTuple,
     Content,
     ContentCharacteristic,
+    LanguageScanner,
     process_folder,
 )
 from content.collection_content import CollectionContent
@@ -39,6 +40,7 @@ class ContentCallback(Callback[Content, ContentCharacteristic, CallbackTuple]):
     __options: ContentOptions
     __classifier: Classifier
     __name_parser: NameParser
+    __scanner: LanguageScanner
     __progress_bars: dict[str, Any]
     __manager: Manager
     __status_bar: Any
@@ -48,12 +50,14 @@ def __init__(
         options: ContentOptions,
         classifier: Classifier,
         name_parser: NameParser,
+        scanner: LanguageScanner,
     ) -> None:
         super().__init__()
 
         self.__options = options
         self.__classifier = classifier
         self.__name_parser = name_parser
+        self.__scanner = scanner
         self.__progress_bars = {}
         manager = get_manager()
         if not isinstance(manager, Manager):
@@ -72,7 +76,7 @@ def __init__(
 
     @override
     def get_saved(self: Self) -> CallbackTuple:
-        return (self.__manager, self.__classifier)
+        return (self.__manager, self.__classifier, self.__scanner)
 
     @override
     def ignore(
@@ -233,7 +237,7 @@ def load_from_file(file_path: Path) -> list[Content]:
                 )
                 return json_loaded
             case _:
-                msg = f"Not loadable from '{suffix}' file!"
+                msg = f"Data not loadable from '{suffix}' file!"
                 raise RuntimeError(msg)
 
 
@@ -254,7 +258,7 @@ def save_to_file(file_path: Path, contents: list[Content]) -> None:
                 json_content: str = json.dumps(encoded_dict, indent=4)
                 file.write(json_content)
             case _:
-                msg = f"Not loadable from '{suffix}' file!"
+                msg = f"Data not saveable from '{suffix}' file!"
                 raise RuntimeError(msg)
 
 
@@ -263,9 +267,10 @@ def parse_contents(
     options: ContentOptions,
     save_file: Path,
     name_parser: NameParser,
+    scanner: LanguageScanner,
 ) -> list[Content]:
     classifier = Classifier()
-    callback = ContentCallback(options, classifier, name_parser)
+    callback = ContentCallback(options, classifier, name_parser, scanner)
 
     if not save_file.exists():
         contents: list[Content] = process_folder(