Skip to content

Commit

Permalink
Move load_product_lists to _utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
avalentino committed Nov 19, 2023
1 parent 92a7c20 commit 30c8770
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 269 deletions.
25 changes: 24 additions & 1 deletion asfsmd/_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Utility functions for asfsmd."""

from typing import Any, Iterable, List
import json
import pathlib
import collections
from typing import Any, Dict, Iterable, List

from .common import PathType


def unique(data: Iterable[Any]) -> List[Any]:
Expand All @@ -12,3 +17,21 @@ def unique(data: Iterable[Any]) -> List[Any]:
unique_items.append(item)
unique_items_set.add(item)
return unique_items


def load_product_lists(*filenames: PathType) -> Dict[str, List[str]]:
"""Load product list form files."""
data: Dict[str, List[str]] = collections.defaultdict(list)
for filename in filenames:
filename = pathlib.Path(filename)
if filename.suffix == ".json":
data.update(json.loads(filename.read_text()))
else:
with filename.open() as fd:
for line in fd:
line = line.strip()
if not line or line.startswith("#"):
continue
data[""].append(line)

return {key: unique(values) for key, values in data.items()}
25 changes: 4 additions & 21 deletions asfsmd/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# PYTHON_ARGCOMPLETE_OK

import json

import logging
import pathlib
import argparse
Expand All @@ -19,8 +19,8 @@
make_patterns,
_get_auth,
)
from ._utils import unique
from .common import BLOCKSIZE, MB, PathType
from ._utils import unique, load_product_lists
from .common import BLOCKSIZE, MB

try:
from os import EX_OK
Expand All @@ -32,23 +32,6 @@
LOGFMT = "%(asctime)s %(levelname)-8s -- %(message)s"


def _load_product_lists(*filenames: PathType) -> Dict[str, List[str]]:
data: Dict[str, List[str]] = collections.defaultdict(list)
for filename in filenames:
filename = pathlib.Path(filename)
if filename.suffix == ".json":
data.update(json.loads(filename.read_text()))
else:
with filename.open() as fd:
for line in fd:
line = line.strip()
if not line or line.startswith("#"):
continue
data[""].append(line)

return {key: unique(values) for key, values in data.items()}


def asfsmd_cli(
inputs: Iterable[str],
beam: Optional[str] = "*",
Expand Down Expand Up @@ -89,7 +72,7 @@ def asfsmd_cli(
else:
products_tree: Dict[str, List[str]] = collections.defaultdict(list)
if file_list:
products_tree = _load_product_lists(*inputs)
products_tree = load_product_lists(*inputs)
else:
# Ignore if user passed files with .zip or .SAFE extensions
products_tree[""].extend(
Expand Down
244 changes: 243 additions & 1 deletion tests/test__utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from asfsmd._utils import unique
from asfsmd._utils import unique, load_product_lists


@pytest.mark.parametrize(
Expand All @@ -23,3 +23,245 @@
)
def test_unique(in_, out):
assert unique(in_) == out


@pytest.mark.parametrize(
["idata", "odata"],
[
pytest.param(
"""\
{
"": [
"filelist01.txt",
"filelist02.txt"
],
"a": [
"a01.txt",
"a02.txt"
],
"b/1": [
"b01.txt",
"b02.txt"
]
}
""",
{
"": ["filelist01.txt", "filelist02.txt"],
"a": ["a01.txt", "a02.txt"],
"b/1": ["b01.txt", "b02.txt"],
},
id="unique",
),
pytest.param(
"""\
{
"": [
"filelist01.txt",
"filelist02.txt",
"a02.txt"
],
"a": [
"a01.txt",
"a02.txt"
],
"b/1": [
"b01.txt",
"b02.txt"
]
}
""",
{
"": ["filelist01.txt", "filelist02.txt", "a02.txt"],
"a": ["a01.txt", "a02.txt"],
"b/1": ["b01.txt", "b02.txt"],
},
id="unique-per-section",
),
pytest.param(
"""\
{
"": [
"filelist01.txt",
"filelist02.txt",
"filelist01.txt"
],
"a": [
"a01.txt",
"a02.txt",
"a02.txt"
],
"b/1": [
"b01.txt",
"b02.txt"
]
}
""",
{
"": ["filelist01.txt", "filelist02.txt"],
"a": ["a01.txt", "a02.txt"],
"b/1": ["b01.txt", "b02.txt"],
},
id="duplicate",
),
],
)
def test__load_product_lists_json(idata, odata, tmp_path):
jsonfile = tmp_path / "productlist.json"
jsonfile.write_text(idata)
data = load_product_lists(jsonfile)
assert data == odata


@pytest.mark.parametrize(
["idata", "odata"],
[
pytest.param(
"""\
filelist01.txt
filelist02.txt
filelist03.txt
""",
{
"": ["filelist01.txt", "filelist02.txt", "filelist03.txt"],
},
id="unique",
),
pytest.param(
"""\
# comment line
filelist01.txt
filelist02.txt
# indented comment line
filelist03.txt
""",
{
"": ["filelist01.txt", "filelist02.txt", "filelist03.txt"],
},
id="unique-with-comment",
),
pytest.param(
# NOTE: filename01.txt has trailing spaces
(
"filelist01.txt \n"
"\n"
"filelist02.txt\n"
" \n"
" filelist03.txt \n"
),
{
"": ["filelist01.txt", "filelist02.txt", "filelist03.txt"],
},
id="unique-with-emply-line",
),
pytest.param(
"""\
filelist01.txt
filelist02.txt
filelist03.txt
filelist03.txt
""",
{
"": ["filelist01.txt", "filelist02.txt", "filelist03.txt"],
},
id="duplicate",
),
pytest.param(
"""\
# comment
filelist01.txt
filelist02.txt
# duplicates
filelist03.txt
filelist03.txt
""",
{
"": ["filelist01.txt", "filelist02.txt", "filelist03.txt"],
},
id="duplicate-with-comments-and-empty-lines",
),
],
)
def test__load_product_lists_text(idata, odata, tmp_path):
textfile = tmp_path / "productlist.txt"
textfile.write_text(idata)
data = load_product_lists(textfile)
assert data == odata


@pytest.mark.parametrize(
["jsondata", "textdata", "odata"],
[
pytest.param(
"""\
{
"a": [
"a01.txt",
"a02.txt"
],
"b/1": [
"b01.txt",
"b02.txt"
]
}
""",
"""\
filelist01.txt
filelist02.txt
""",
{
"": [
"filelist01.txt",
"filelist02.txt",
],
"a": [
"a01.txt",
"a02.txt",
],
"b/1": [
"b01.txt",
"b02.txt",
],
},
id="unique",
),
pytest.param(
"""\
{
"": [
"filelist01.txt",
"filelist02.txt"
],
"a": [
"a01.txt",
"a02.txt"
]
}
""",
"""\
filelist01.txt
filelist02.txt
filelist03.txt
""",
{
"": [
"filelist01.txt",
"filelist02.txt",
"filelist03.txt",
],
"a": [
"a01.txt",
"a02.txt",
],
},
id="duplicate",
),
],
)
def test__load_product_lists_multifile(jsondata, textdata, odata, tmp_path):
jsonfile = tmp_path / "jsonfile.json"
jsonfile.write_text(jsondata)
textfile = tmp_path / "textfile.txt"
textfile.write_text(textdata)
data = load_product_lists(jsonfile, textfile)
assert data == odata
Loading

0 comments on commit 30c8770

Please sign in to comment.