Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename file numbering #62

Merged
merged 11 commits into from
Sep 28, 2023
61 changes: 58 additions & 3 deletions alto2txt2fixture/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@
)
from .settings import DATA_PROVIDER_INDEX, SETUP_TITLE, settings
from .types import dotdict
from .utils import check_newspaper_collection_configuration, console, gen_fixture_tables
from .utils import (
FILE_NAME_0_PADDING_DEFAULT,
check_newspaper_collection_configuration,
console,
copy_dict_paths,
gen_fixture_tables,
glob_path_rename_by_0_padding,
)

cli = typer.Typer(pretty_exceptions_show_locals=False)

Expand All @@ -38,6 +45,9 @@ def plaintext(
records_per_json: Annotated[
int, typer.Option(help="Max records per json fixture")
] = DEFAULT_MAX_PLAINTEXT_PER_FIXTURE_FILE,
digit_padding: Annotated[
int, typer.Option(help="Padding '0's for indexing json fixture filenames")
] = FILE_NAME_0_PADDING_DEFAULT,
) -> None:
"""Create a PlainTextFixture and save to `save_path`."""
plaintext_fixture = PlainTextFixture(
Expand All @@ -47,15 +57,17 @@ def plaintext(
export_directory=save_path,
initial_pk=initial_pk,
max_plaintext_per_fixture_file=records_per_json,
json_0_file_name_padding=digit_padding,
)
plaintext_fixture.info()
while (
not plaintext_fixture.compressed_files
and not plaintext_fixture.plaintext_provided_uncompressed
):
try_another_compressed_txt_source: bool = Confirm.ask(
f"No .txt files available from extract path: {plaintext_fixture.trunc_extract_path_str}\n"
"Would you like to extract fixtures from a different path?"
f"No .txt files available from extract path: "
f"{plaintext_fixture.trunc_extract_path_str}\n"
f"Would you like to extract fixtures from a different path?"
)
if try_another_compressed_txt_source:
new_extract_path: str = Prompt.ask("Please enter a new extract path")
Expand All @@ -67,6 +79,49 @@ def plaintext(
plaintext_fixture.export_to_json_fixtures()


@cli.command()
def reindex(
path: Annotated[Path, typer.Argument(help="Path to files to rename")],
folder: Annotated[Path, typer.Option(help="Path to save renamed files")] = Path(),
regex: Annotated[str, typer.Option(help="Regex to filter files to rename")] = "*",
padding: Annotated[
int, typer.Option(help="How many digits to pad by (guessed if blank)")
] = FILE_NAME_0_PADDING_DEFAULT,
dry_run: Annotated[
bool, typer.Option(help="Show example paths without copy")
] = True,
) -> None:
"""Rename files for ordering."""
rename_paths_dict: dict[os.PathLike, os.PathLike] = glob_path_rename_by_0_padding(
path=path,
output_path=folder,
glob_regex_str=regex,
padding=padding,
)

paths_table: Table = Table(title=f"Copy and rename folders")
paths_table.add_column("From Folder", justify="right", style="cyan")
paths_table.add_column("New Folder", style="magenta")
paths_table.add_row(str(path), str(folder))
console.print(paths_table)

file_names_table: Table = Table(title="Old and New File Names")
file_names_table.add_column("Current File Name", justify="right", style="cyan")
file_names_table.add_column("New File Name", style="magenta")
for old_path, new_path in rename_paths_dict.items():
file_names_table.add_row(Path(old_path).name, Path(new_path).name)
console.print(file_names_table)

make_copy: bool = False or not dry_run
if dry_run:
make_copy = Confirm.ask(
f"Would you like to copy these {len(rename_paths_dict)} "
f"files from Current:\n'{path}'\nto New:\n'{folder}\n'"
)
if make_copy:
copy_dict_paths(rename_paths_dict)


def show_setup(clear: bool = True, title: str = SETUP_TITLE, **kwargs) -> None:
"""Generate a `rich.table.Table` for printing configuration to console."""
if clear and os.name == "posix":
Expand Down
26 changes: 22 additions & 4 deletions alto2txt2fixture/plaintext.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
PlaintextFixtureFieldsDict,
)
from .utils import (
FILE_NAME_0_PADDING_DEFAULT,
TRUNC_HEADS_PATH_DEFAULT,
TRUNC_TAILS_PATH_DEFAULT,
ZIP_FILE_EXTENSION,
Expand Down Expand Up @@ -128,6 +129,9 @@ class PlainTextFixture:
Default begins at 1, can be set to another number if needed to
add to add more to pre-existing set of records up to a given `pk`

json_0_file_name_padding:
Number of `0`s to prefix file name numbering.

_disk_usage:
Available harddrive space. Designed to help mitigate decompressing too
many files for available disk space.
Expand Down Expand Up @@ -158,6 +162,7 @@ class PlainTextFixture:
│ Data Provider │ 'Living with Machines' ...│
│ Initial Primary Key │ 1 ...│
│ Max Rows Per JSON │ 100 ...│
│ JSON File Name 0s │ 6 ...│
└─────────────────────┴────────────────────────────────...┘
>>> plaintext_bl_lwm.free_hd_space_in_GB > 1
True
Expand Down Expand Up @@ -190,6 +195,7 @@ class PlainTextFixture:
saved_fixture_prefix: str = DEFAULT_PLAINTEXT_FILE_NAME_PREFIX
export_directory: PathLike = DEFAULT_PLAINTEXT_FIXTURE_OUTPUT
empty_info_default_str: str = "None"
json_0_file_name_padding: int = FILE_NAME_0_PADDING_DEFAULT
_trunc_head_paths: int = TRUNC_HEADS_PATH_DEFAULT
_trunc_tails_paths: int = TRUNC_TAILS_PATH_DEFAULT
_trunc_tails_sub_paths: int = TRUNC_TAILS_SUBPATH_DEFAULT
Expand Down Expand Up @@ -256,7 +262,7 @@ def trunc_extract_path_str(self) -> str:

@property
def info_table(self) -> str:
"""Generate a `rich.ltable.Table` of config information.
"""Generate a `rich.table.Table` of config information.

Example:
```pycon
Expand All @@ -278,6 +284,7 @@ def info_table(self) -> str:
table.add_row("Data Provider", f"'{str(self.data_provider_name)}'")
table.add_row("Initial Primary Key", str(self.initial_pk))
table.add_row("Max Rows Per JSON", str(self.max_plaintext_per_fixture_file))
table.add_row("JSON File Name 0s", str(self.json_0_file_name_padding))
return table

def info(self) -> None:
Expand Down Expand Up @@ -401,7 +408,7 @@ def extract_path(self) -> Path:

@property
def compressed_files(self) -> tuple[PathLike, ...]:
"""Return a tuple of all `self.files` with known archive filenames."""
"""Return a tuple of all `self.files` with known archive file names."""
return (
tuple(sorted(valid_compression_files(files=self.files)))
if self.files
Expand Down Expand Up @@ -593,7 +600,10 @@ def plaintext_paths_to_dicts(self) -> Generator[PlaintextFixtureDict, None, None
)

def export_to_json_fixtures(
self, output_path: PathLike | None = None, prefix: str | None = None
self,
output_path: PathLike | None = None,
prefix: str | None = None,
json_0_file_name_padding: int | None = None,
) -> None:
"""Iterate over `self.plaintext_paths` exporting to `json` `django` fixtures.

Expand All @@ -606,6 +616,8 @@ def export_to_json_fixtures(
Folder to save all `json` fixtures in.
prefix:
Any `str` prefix for saved fixture files.
json_0_file_name_padding:
Number of `0`s to prefix file name numbering.

Example:
```pycon
Expand All @@ -624,7 +636,7 @@ def export_to_json_fixtures(
>>> len(plaintext_bl_lwm._exported_json_paths)
1
>>> plaintext_bl_lwm._exported_json_paths
(...Path(...plaintext_fixture-1.json...),)
(...Path(...plaintext_fixture-000001.json...),)
>>> import json
>>> exported_json = json.loads(
... plaintext_bl_lwm._exported_json_paths[0].read_text()
Expand Down Expand Up @@ -652,12 +664,18 @@ def export_to_json_fixtures(
"""
output_path = self.export_directory if not output_path else output_path
prefix = self.saved_fixture_prefix if not prefix else prefix
json_0_file_name_padding = (
self.json_0_file_name_padding
if not json_0_file_name_padding
else json_0_file_name_padding
)
save_fixture(
self.plaintext_paths_to_dicts(),
prefix=prefix,
output_path=output_path,
add_created=True,
max_elements_per_file=self.max_plaintext_per_fixture_file,
file_name_0_padding=json_0_file_name_padding,
)
self._exported_json_paths = tuple(
Path(path) for path in sorted(Path(output_path).glob(f"**/{prefix}*.json"))
Expand Down
Loading