Skip to content

Commit

Permalink
add Language scanner:
Browse files Browse the repository at this point in the history
 - use it instead of teh global itr counter, it allows to scan only a certain amount ofg languages
 - add PartialLanguageScanner as class, that takes a config and only scans a few languages, see README.md for an example config file
 - add FullLanguageScanner as option, to scan everything
  • Loading branch information
Totto16 committed Aug 14, 2023
1 parent 43aed17 commit 00f23a7
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ model/
/.vscode
/.coverage
/coverage.xml
/config.ini
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,12 @@ VERSION 2: Breaking changes (how the data ist stored) !
## Customize

Folder, title etc.


## config.ini example

```ini
[settings]
scan_amount= 100
start_position = 0
```
27 changes: 23 additions & 4 deletions src/content/base_class.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass, field
from enum import Enum
from os import listdir
from pathlib import Path
from typing import Any, Optional, Self, TypedDict
Expand All @@ -10,6 +11,7 @@
from content.general import (
Callback,
ContentType,
EpisodeDescription,
MissingOverrideError,
ScannedFile,
ScannedFileType,
Expand All @@ -25,8 +27,21 @@ class ContentDict(TypedDict):
scanned_file: ScannedFile


# TODO
CallbackTuple = tuple[Manager, Classifier]
class ScanType(Enum):
first_scan = "first_scan"
rescan = "rescan"


class LanguageScanner:
def should_scan(
self: Self,
description: EpisodeDescription, # noqa: ARG002
scan_type: ScanType, # noqa: ARG002
) -> bool:
raise MissingOverrideError


CallbackTuple = tuple[Manager, Classifier, LanguageScanner]


@dataclass(slots=True, repr=True)
Expand Down Expand Up @@ -55,7 +70,9 @@ def generate_checksum(self: Self, manager: Manager) -> None:
def scan(
self: Self,
callback: Callback[ # noqa: ARG002
"Content", ContentCharacteristic, CallbackTuple,
"Content",
ContentCharacteristic,
CallbackTuple,
],
*,
parent_folders: list[str], # noqa: ARG002
Expand Down Expand Up @@ -97,7 +114,9 @@ def process_folder(
results: list[Content] = []
for file_path, file_type, parent_folders in temp:
result: Optional[Content] = callback.process(
file_path, file_type, parent_folders,
file_path,
file_type,
parent_folders,
)
value = (
result.type if result is not None else None,
Expand Down
41 changes: 6 additions & 35 deletions src/content/episode_content.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import (
Literal,
Expand All @@ -17,6 +16,7 @@
Content,
ContentCharacteristic,
ContentDict,
ScanType,
)
from content.general import (
Callback,
Expand All @@ -34,26 +34,6 @@ class EpisodeContentDict(ContentDict):
language: Language


# TODO: remove
GLOBAL_ITER_MAX: int = 200
SKIP_ITR: int = 530
itr: int = 0


# TODO: remove
def itr_print_percent() -> None:
global itr # noqa: PLW0602
if itr < SKIP_ITR:
return

if itr >= GLOBAL_ITER_MAX + SKIP_ITR:
return

percent: float = (itr - SKIP_ITR) / GLOBAL_ITER_MAX * 100.0

print(f"{percent:.02f} %")


@schema(extra=narrow_type(("type", Literal[ContentType.episode])))
@dataclass(slots=True, repr=True)
class EpisodeContent(Content):
Expand Down Expand Up @@ -145,7 +125,7 @@ def scan(
parent_folders: list[str],
rescan: bool = False,
) -> None:
manager, classifier = callback.get_saved()
manager, classifier, scanner = callback.get_saved()

characteristic: ContentCharacteristic = (self.type, self.scanned_file.type)

Expand All @@ -161,13 +141,8 @@ def scan(
characteristic,
)

# TODO: remove
global itr # noqa: PLW0603
if itr < GLOBAL_ITER_MAX + SKIP_ITR:
itr_print_percent()
itr = itr + 1
if itr >= SKIP_ITR:
self.__language = self.__get_language(classifier, manager)
if scanner.should_scan(self.__description, ScanType.rescan):
self.__language = self.__get_language(classifier, manager)

callback.progress(
self.scanned_file.path.name,
Expand Down Expand Up @@ -197,12 +172,8 @@ def scan(
characteristic,
)

# TODO: re-enable
if itr < GLOBAL_ITER_MAX + SKIP_ITR:
itr_print_percent()
itr = itr + 1
if itr >= SKIP_ITR:
self.__language = self.__get_language(classifier, manager)
if scanner.should_scan(self.__description, ScanType.first_scan):
self.__language = self.__get_language(classifier, manager)

callback.progress(
self.scanned_file.path.name,
Expand Down
2 changes: 1 addition & 1 deletion src/content/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from enlighten import Manager


class ScannedFileType(str, Enum):
class ScannedFileType(Enum):
file = "file"
folder = "folder"

Expand Down
130 changes: 130 additions & 0 deletions src/content/scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import json
from configparser import ConfigParser
from pathlib import Path
from typing import Optional, Self, TypedDict

from helper.timestamp import parse_int_safely
from typing_extensions import override

from content.base_class import LanguageScanner, ScanType
from content.general import EpisodeDescription


class StaticLanguageScanner(LanguageScanner):
__value: bool

def __init__(self: Self,*, value: bool) -> None:
super().__init__()
self.__value = value

@override
def should_scan(
self: Self,
description: EpisodeDescription,
scan_type: ScanType,
) -> bool:
return self.__value


class FullLanguageScanner(StaticLanguageScanner):
def __init__(
self: Self,
) -> None:
super().__init__(value=True)


class NoLanguageScanner(StaticLanguageScanner):
def __init__(
self: Self,
) -> None:
super().__init__(value=True)


class PartialScannerDict(TypedDict, total=False):
start_position: int
scan_amount: int


class PartialScannerDictTotal(TypedDict, total=True):
start_position: int
scan_amount: int


INI_SETTINGS_SECTION_KEY = "settings"


class PartialLanguageScanner(LanguageScanner):
__start_position: int
__scan_amount: int
__current_position: int

@property
def __defaults(self: Self) -> PartialScannerDictTotal:
return {"start_position": 0, "scan_amount": 100}

def __init__(self: Self, config_file: Path = Path("./config.ini")) -> None:
super().__init__()
loaded_dict: Optional[PartialScannerDict] = None

if config_file.exists():
with config_file.open(mode="r") as file:
suffix: str = config_file.suffix[1:]
match suffix:
case "json":
loaded_dict = json.load(file)
case "ini":
config = ConfigParser()
config.read(config_file)
if INI_SETTINGS_SECTION_KEY in config:
temp_dict = dict(config.items(INI_SETTINGS_SECTION_KEY))

loaded_dict = {}
if temp_dict.get("start_position", None) is not None:
int_result = parse_int_safely(
temp_dict["start_position"],
)
if int_result is not None:
loaded_dict["start_position"] = int_result
if temp_dict.get("scan_amount", None) is not None:
int_result = parse_int_safely(
temp_dict["scan_amount"],
)
if int_result is not None:
loaded_dict["scan_amount"] = int_result

case _:
msg = f"Config not loadable from '{suffix}' file!"
raise RuntimeError(msg)

if loaded_dict is not None:
self.__start_position = loaded_dict.get(
"start_position",
self.__defaults["start_position"],
)
self.__scan_amount = loaded_dict.get(
"scan_amount",
self.__defaults["scan_amount"],
)
else:
self.__start_position = self.__defaults["start_position"]
self.__scan_amount = self.__defaults["scan_amount"]

self.__current_position = 0

@override
def should_scan(
self: Self,
description: EpisodeDescription,
scan_type: ScanType,
) -> bool:
if (self.__start_position >= self.__current_position) and (
self.__start_position < (self.__current_position + self.__scan_amount)
):
self.__current_position += 1

self.__current_position += 1
return False


# TODO add time based scanner
# TODO optionally ask user for input each x steps as option for other scanners
3 changes: 2 additions & 1 deletion src/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from classifier import Language
from content.base_class import Content # noqa: TCH002
from content.general import NameParser, Summary
from content.scanner import PartialLanguageScanner
from helper.timestamp import parse_int_safely
from main import AllContent, generate_json_schema, parse_contents
from typing_extensions import override
Expand Down Expand Up @@ -105,7 +106,7 @@ def main() -> None:
},
Path("data/data.json"),
name_parser=CustomNameParser(SPECIAL_NAMES),
# TODO: make it possible to define to do a full scan or not (full= with language detection, not full = without)
scanner=PartialLanguageScanner(Path("./config.ini")),
)

summaries = [content.summary() for content in contents]
Expand Down
13 changes: 9 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
CallbackTuple,
Content,
ContentCharacteristic,
LanguageScanner,
process_folder,
)
from content.collection_content import CollectionContent
Expand Down Expand Up @@ -39,6 +40,7 @@ class ContentCallback(Callback[Content, ContentCharacteristic, CallbackTuple]):
__options: ContentOptions
__classifier: Classifier
__name_parser: NameParser
__scanner: LanguageScanner
__progress_bars: dict[str, Any]
__manager: Manager
__status_bar: Any
Expand All @@ -48,12 +50,14 @@ def __init__(
options: ContentOptions,
classifier: Classifier,
name_parser: NameParser,
scanner: LanguageScanner,
) -> None:
super().__init__()

self.__options = options
self.__classifier = classifier
self.__name_parser = name_parser
self.__scanner = scanner
self.__progress_bars = {}
manager = get_manager()
if not isinstance(manager, Manager):
Expand All @@ -72,7 +76,7 @@ def __init__(

@override
def get_saved(self: Self) -> CallbackTuple:
return (self.__manager, self.__classifier)
return (self.__manager, self.__classifier, self.__scanner)

@override
def ignore(
Expand Down Expand Up @@ -233,7 +237,7 @@ def load_from_file(file_path: Path) -> list[Content]:
)
return json_loaded
case _:
msg = f"Not loadable from '{suffix}' file!"
msg = f"Data not loadable from '{suffix}' file!"
raise RuntimeError(msg)


Expand All @@ -254,7 +258,7 @@ def save_to_file(file_path: Path, contents: list[Content]) -> None:
json_content: str = json.dumps(encoded_dict, indent=4)
file.write(json_content)
case _:
msg = f"Not loadable from '{suffix}' file!"
msg = f"Data not saveable from '{suffix}' file!"
raise RuntimeError(msg)


Expand All @@ -263,9 +267,10 @@ def parse_contents(
options: ContentOptions,
save_file: Path,
name_parser: NameParser,
scanner: LanguageScanner,
) -> list[Content]:
classifier = Classifier()
callback = ContentCallback(options, classifier, name_parser)
callback = ContentCallback(options, classifier, name_parser, scanner)

if not save_file.exists():
contents: list[Content] = process_folder(
Expand Down

0 comments on commit 00f23a7

Please sign in to comment.