diff --git a/src/vetr_summarizer/__main__.py b/src/vetr_summarizer/__main__.py index 5c729a0..f80f155 100644 --- a/src/vetr_summarizer/__main__.py +++ b/src/vetr_summarizer/__main__.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- -from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +import argparse from pathlib import Path -from vetr_summarizer.html_generator import generate_html - -from . import __version__ +from vetr_summarizer import __version__ +from vetr_summarizer.config import Config +from vetr_summarizer.main import VetrSummarizer def main(): - parser = ArgumentParser( + parser = argparse.ArgumentParser( prog="vetr-summarizer", - formatter_class=ArgumentDefaultsHelpFormatter, - description="Process and summarize aci-vetr-data JSON files into pretty HTML reports.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Process and summarize aci-vetr-data raw JSON files into pretty HTML reports.", epilog="Thanks for using %(prog)s! :)", add_help=True, allow_abbrev=True, @@ -21,7 +21,7 @@ def main(): parser.add_argument( "directory", type=Path, - help="A path to the directory containing the JSON files.", + help="Directory containing JSON files.", ) parser.add_argument( "-f", @@ -33,20 +33,24 @@ def main(): ) parser.add_argument( "-x", - "--exclude-file", + "--excluded-keys-file", type=Path, required=False, - default=Path(__file__).parent / "config/excluded_keys", - help="File containing keys to exclude from the summary report.", + default=Path(__file__).parent / "config" / "excluded_keys", + help="File with keys to exclude from raw JSON files.", ) parser.add_argument( "-v", "--version", action="version", version=f"%(prog)s version {__version__}" ) args = parser.parse_args() - output_file = generate_html(args.directory, args.exclude_file) + config = Config( + format=args.format, + excluded_keys_file=args.excluded_keys_file, + ) - print(f"HTML output is written to {output_file}") + summarizer = VetrSummarizer(args.directory, config) + summarizer.summarize() if __name__ == "__main__": diff --git a/src/vetr_summarizer/config.py b/src/vetr_summarizer/config.py new file mode 100644 index 0000000..fdc6f14 --- /dev/null +++ b/src/vetr_summarizer/config.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +from pathlib import Path + + +class Config(object): + def __init__( + self, + format: str = "html", + excluded_keys_file: Path = "excluded_keys", + output_html: str = "vetr-summary.html", + template_file: str = "vetr-data.j2", + ): + self.format = format + self.template_file = template_file + self.output_html = output_html + self.excluded_keys = self.load_excluded_keys(excluded_keys_file) + + def load_excluded_keys(self, file_path: Path) -> set: + if not file_path.exists(): + print(f"WARNING: {file_path} does not exist! No keys will be excluded.") + return set() + + if file_path.exists() and not file_path.read_text().splitlines(): + print( + f"WARNING: {file_path} does exists but without keys! No keys will be excluded." + ) + return set() + + return { + key.strip() + for key in Path(file_path).read_text().splitlines() + if not key.isspace() + } diff --git a/src/vetr_summarizer/html_generator.py b/src/vetr_summarizer/html_generator.py deleted file mode 100644 index 8b1edab..0000000 --- a/src/vetr_summarizer/html_generator.py +++ /dev/null @@ -1,77 +0,0 @@ -# -*- coding: utf-8 -*- -import json -from pathlib import Path - -from jinja2 import Environment, FileSystemLoader - - -def load_excluded_keys(file_path: Path) -> set: - if not file_path.exists(): - print(f"WARNING: {file_path} does not exist! No keys will be excluded.") - return set() - - if file_path.exists() and not file_path.read_text().splitlines(): - print( - f"WARNING: {file_path} does exists but without keys! No keys will be excluded." - ) - return set() - - return { - key.strip() - for key in Path(file_path).read_text().splitlines() - if not key.isspace() - } - - -def generate_html(directory_path: Path, exclude_file: Path): - exclude_keys = load_excluded_keys(exclude_file) - - accordion_items = [] - - for json_file in directory_path.glob("*.json"): - try: - data: dict = json.loads(json_file.read_text()) - except json.JSONDecodeError as e: - print(f"Error decoding JSON from {json_file}", e) - continue - - if int(data.get("totalCount", 0)): - rows: list[dict] = [] - - for item in data.get("imdata", []): - attributes: dict[str, str] = item.get(json_file.stem, {}).get( - "attributes", {} - ) - valuable_attrs = { - k: v - for k, v in attributes.items() - if v and not v.isspace() and k not in exclude_keys - } - - if valuable_attrs: - rows.append(valuable_attrs) - - if rows: - headers = rows[0].keys() - table_headers = "".join(f"{header}" for header in headers) - table_rows = "".join( - f"{''.join(f'{row.get(header, '')}' for header in headers)}" - for row in rows - ) - - accordion_items.append( - { - "title": json_file.stem, - "headers": table_headers, - "rows": table_rows, - } - ) - - env = Environment(loader=FileSystemLoader(Path(__file__).parent / "templates")) - template = env.get_template("vetr-data.j2") - output_html = template.render(accordion_items=accordion_items) - - output_file = Path.cwd() / "vetr-summary.html" - output_file.write_text(output_html) - - return output_file diff --git a/src/vetr_summarizer/main.py b/src/vetr_summarizer/main.py new file mode 100644 index 0000000..c720042 --- /dev/null +++ b/src/vetr_summarizer/main.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +import json +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader + +from vetr_summarizer.config import Config + + +class VetrSummarizer(object): + def __init__(self, directory: Path, config: Config): + self.directory = directory + self.config = config + self.accordion_items = [] + + def load_json_files(self): + for json_file in self.directory.glob("*.json"): + data = json.loads(json_file.read_text()) + if int(data.get("totalCount", 0)): + key = json_file.stem + rows = self._process_json_data(data, key) + if rows: + self._add_accordion_item(json_file.stem, rows) + + def _process_json_data(self, data: dict, key: str): + rows = [] + for item in data.get("imdata", []): + attributes = item.get(key, {}).get("attributes", {}) + valuable_attrs = { + k: v + for k, v in attributes.items() + if v and not v.isspace() and k not in self.config.excluded_keys + } + if valuable_attrs: + rows.append(valuable_attrs) + return rows + + def _add_accordion_item(self, title: str, rows: list[dict]): + headers = rows[0].keys() + table_headers = "".join(f"{header}" for header in headers) + table_rows = "".join( + f"{''.join(f'{row.get(header, '')}' for header in headers)}" + for row in rows + ) + self.accordion_items.append( + { + "title": title, + "headers": table_headers, + "rows": table_rows, + } + ) + + def generate_report(self, output_file: Path = None): + if not self.accordion_items: + print("WARNING: No data available to generate HTML report!") + return + + environment = Environment( + loader=FileSystemLoader(Path(__file__).parent / "templates") + ) + html_template = environment.get_template(self.config.template_file) + + output_file = Path.cwd() / self.config.output_html + output_file.write_text( + html_template.render(accordion_items=self.accordion_items) + ) + print(f"HTML report is written to {output_file}") + + def summarize(self): + self.load_json_files() + self.generate_report()