Skip to content

Commit

Permalink
feat(plaintext): add typer cli
Browse files Browse the repository at this point in the history
  • Loading branch information
spool committed Aug 23, 2023
1 parent 279b940 commit 2d591ae
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 9 deletions.
28 changes: 28 additions & 0 deletions alto2txt2fixture/cli.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,39 @@
import os
from pathlib import Path

import typer
from rich.table import Table
from typing_extensions import Annotated

from .plaintext import (
DEFAULT_EXTRACTED_SUBDIR,
DEFAULT_PLAINTEXT_FIXTURE_OUTPUT,
PlainTextFixture,
)
from .settings import DATA_PROVIDER_INDEX, SETUP_TITLE, settings
from .types import dotdict
from .utils import check_newspaper_collection_configuration, console, gen_fixture_tables

cli = typer.Typer(pretty_exceptions_show_locals=False)


@cli.command()
def plaintext(
path: Annotated[Path, typer.Argument()],
save_path: Annotated[Path, typer.Option()] = Path(DEFAULT_PLAINTEXT_FIXTURE_OUTPUT),
data_provider_code: Annotated[str, typer.Option()] = "",
extract_path: Annotated[Path, typer.Argument()] = Path(DEFAULT_EXTRACTED_SUBDIR),
) -> None:
"""Create a PlainTextFixture and save to `save_path`."""
plaintext_fixture = PlainTextFixture(
path=path,
data_provider_code=data_provider_code,
extract_subdir=extract_path,
export_directory=save_path,
)
plaintext_fixture.extract_compressed()
plaintext_fixture.export_to_json_fixtures()


def show_setup(clear: bool = True, title: str = SETUP_TITLE, **kwargs) -> None:
"""Generate a `rich.table.Table` for printing configuration to console."""
Expand Down
14 changes: 7 additions & 7 deletions alto2txt2fixture/plaintext.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@

FULLTEXT_DJANGO_MODEL: Final[str] = "fulltext.fulltext"

HOME_DIR: PathLike = Path.home()
DOWNLOAD_DIR: PathLike = HOME_DIR / "metadata-db/"
ARCHIVE_SUBDIR: PathLike = Path("archives")
EXTRACTED_SUBDIR: PathLike = Path("extracted")
# HOME_DIR: PathLike = Path.home()
# DOWNLOAD_DIR: PathLike = HOME_DIR / "metadata-db/"
# ARCHIVE_SUBDIR: PathLike = Path("archives")
DEFAULT_EXTRACTED_SUBDIR: Final[PathLike] = Path("extracted")
FULLTEXT_METHOD: str = "download"
FULLTEXT_CONTAINER_SUFFIX: str = "-alto2txt"
FULLTEXT_CONTAINER_PATH: PathLike = Path("plaintext/")
Expand Down Expand Up @@ -183,12 +183,12 @@ class PlainTextFixture:
# mount_path: PathLike | None = Path(settings.MOUNTPOINT)
data_provider: DataProviderFixtureDict | None = None
model_str: str = FULLTEXT_DJANGO_MODEL
archive_subdir: PathLike = ARCHIVE_SUBDIR
extract_subdir: PathLike = EXTRACTED_SUBDIR
# archive_subdir: PathLike = ARCHIVE_SUBDIR
extract_subdir: PathLike = DEFAULT_EXTRACTED_SUBDIR
plaintext_extension: str = TXT_FIXTURE_FILE_EXTENSION
plaintext_glob_regex: str = TXT_FIXTURE_FILE_GLOB_REGEX
# decompress_subdir: PathLike = FULLTEXT_DECOMPRESSED_PATH
download_dir: PathLike = DOWNLOAD_DIR
# download_dir: PathLike = DOWNLOAD_DIR
fulltext_container_suffix: str = FULLTEXT_CONTAINER_SUFFIX
data_provider_code_dict: dict[str, DataProviderFixtureDict] = field(
default_factory=lambda: NEWSPAPER_DATA_PROVIDER_CODE_DICT
Expand Down
37 changes: 36 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pytz = "^2022.7.1"
rich = "^12.6.0"
types-pytz = "^2023.3.0.0"
python-slugify = "^8.0.1"
typer = {extras = ["all"], version = "^0.9.0"}

[tool.poetry.group.dev.dependencies]
pytest-sugar = "^0.9.7"
Expand Down Expand Up @@ -45,7 +46,8 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
a2t2f-news = "alto2txt2fixture.__main__:run"
a2tsf-adj = "alto2txt2fixture.create_adjacent_tables:run"
a2t2f-adj = "alto2txt2fixture.create_adjacent_tables:run"
a2t2f-plaintext = "alto2txt2fixture.cli:cli"

[tool.pytest.ini_options]
xfail_strict = true
Expand Down
41 changes: 41 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json

from typer.testing import CliRunner

from alto2txt2fixture.cli import cli
from alto2txt2fixture.types import FixtureDict

runner = CliRunner()


def test_plaintext_cli(tmpdir):
"""Test running `plaintext` file export via `cli`."""
result = runner.invoke(
cli,
[
"tests/bl_lwm/",
"--save-path",
tmpdir / "test-cli-plaintext-fixture",
"--data-provider-code",
"bl_lwm",
],
)
assert result.exit_code == 0
assert "Extract path: tests/bl_lwm/extracted" in result.stdout
exported_json: list[FixtureDict] = json.load(
tmpdir / "test-cli-plaintext-fixture" / "plaintext_fixture-1.json"
)
assert exported_json[0]["model"] == "fulltext.fulltext"
assert "NEW TREDEGAR & BARGOED" in exported_json[0]["fields"]["text"]
assert (
exported_json[0]["fields"]["path"]
== "tests/bl_lwm/extracted/0003548/1904/0630/0003548_19040630_art0002.txt"
)
assert (
exported_json[0]["fields"]["compressed_path"]
== "tests/bl_lwm/0003548-test_plaintext.zip"
)
assert (
exported_json[0]["fields"]["updated_at"]
== exported_json[0]["fields"]["updated_at"]
)

0 comments on commit 2d591ae

Please sign in to comment.