diff --git a/.copier-answers.yml b/.copier-answers.yml index a2d9e9f..53c3f62 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier; NEVER EDIT MANUALLY -_commit: d91bf92 +_commit: 1c41c93 _src_path: gh:scipp/copier_template description: A daemon that creates a raw dataset using scicat interface whenever a new file is written by a file-writer. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f88b98d..4442b1b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,15 +13,6 @@ repos: - id: trailing-whitespace args: [ --markdown-linebreak-ext=md ] exclude: '\.svg' - - repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - name: isort (python) - - repo: https://github.com/psf/black-pre-commit-mirror - rev: 23.11.0 - hooks: - - id: black - repo: https://github.com/kynan/nbstripout rev: 0.6.0 hooks: @@ -29,19 +20,14 @@ repos: types: [ "jupyter" ] args: [ "--drop-empty-cells", "--extra-keys 'metadata.language_info.version cell.metadata.jp-MarkdownHeadingCollapsed cell.metadata.pycharm'" ] - - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 - hooks: - - id: flake8 - types: ["python"] - additional_dependencies: ["flake8-bugbear==23.9.16"] - args: ["--max-line-length=88", "--extend-ignore=E203"] - - repo: https://github.com/pycqa/bandit - rev: 1.7.5 - hooks: - - id: bandit - additional_dependencies: ["bandit[toml]"] - args: ["-c", "pyproject.toml"] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.3 + hooks: + - id: ruff + args: [ --fix ] + types_or: [ python, pyi, jupyter ] + - id: ruff-format + types_or: [ python, pyi ] - repo: https://github.com/codespell-project/codespell rev: v2.2.6 hooks: diff --git a/pyproject.toml b/pyproject.toml index f3831d4..ce568d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,17 +67,43 @@ filterwarnings = [ "error", ] -[tool.bandit] -# Excluding tests because bandit doesn't like `assert`. -exclude_dirs = ["docs/conf.py", "tests"] +[tool.ruff] +line-length = 88 +extend-include = ["*.ipynb"] +extend-exclude = [ + ".*", "__pycache__", "build", "dist", "install", +] + +[tool.ruff.lint] +# See https://docs.astral.sh/ruff/rules/ +select = ["B", "C4", "DTZ", "E", "F", "G", "I", "PERF", "PGH", "PT", "PYI", "RUF", "S", "T20", "UP", "W"] +ignore = [ + # Conflict with ruff format, see + # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "COM812", "COM819", "D206", "D300", "E111", "E114", "E117", "ISC001", "ISC002", "Q000", "Q001", "Q002", "Q003", "W191", +] +fixable = ["I001", "B010"] +isort.known-first-party = ["scicat-filewriter-ingest"] +pydocstyle.convention = "numpy" -[tool.black] -skip-string-normalization = true +[tool.ruff.lint.per-file-ignores] +# those files have an increased risk of relying on import order +"__init__.py" = ["I"] +"tests/*" = [ + "S101", # asserts are fine in tests + "B018", # 'useless expressions' are ok because some tests just check for exceptions +] +"*.ipynb" = [ + "E501", # longer lines are sometimes more readable + "F403", # *-imports used with domain types + "F405", # linter may fail to find names because of *-imports + "I", # we don't collect imports at the top + "S101", # asserts are used for demonstration and are safe in notebooks + "T201", # printing is ok for demonstration purposes +] -[tool.isort] -skip_gitignore = true -profile = "black" -known_first_party = ["scicat-filewriter-ingest"] +[tool.ruff.format] +quote-style = "preserve" [tool.mypy] strict = true diff --git a/requirements/make_base.py b/requirements/make_base.py index 1e1f48e..68a17e8 100644 --- a/requirements/make_base.py +++ b/requirements/make_base.py @@ -1,7 +1,6 @@ import sys from argparse import ArgumentParser from pathlib import Path -from typing import List import tomli @@ -20,7 +19,7 @@ """ -def write_dependencies(dependency_name: str, dependencies: List[str]) -> None: +def write_dependencies(dependency_name: str, dependencies: list[str]) -> None: path = Path(f"{dependency_name}.in") if path.exists(): sections = path.read_text().split(CUSTOM_AUTO_SEPARATOR) diff --git a/src/background-ingestor.py b/src/background-ingestor.py index 79e8f7d..e30d1e6 100644 --- a/src/background-ingestor.py +++ b/src/background-ingestor.py @@ -2,6 +2,7 @@ # Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) import json import logging +import pathlib from collections.abc import Generator from contextlib import contextmanager @@ -56,17 +57,16 @@ def main() -> None: logger.info("Nexus file to be ingested : ") logger.info(nexus_file) - done_writing_message_file = ( + done_writing_message_file = pathlib.Path( arg_namespace.arg_namespace.done_writing_message_file ) logger.info("Done writing message file linked to nexus file : ") logger.info(done_writing_message_file) # open and read done writing message input file - with open(done_writing_message_file, 'r') as f: - done_writing_message = json.load(f) + done_writing_message = json.load(done_writing_message_file.open()) + logger.info(done_writing_message) - print(done_writing_message) # open nexus file # nxs = snx.File(nexus_file) diff --git a/src/scicat_configuration.py b/src/scicat_configuration.py index dd4718a..147b777 100644 --- a/src/scicat_configuration.py +++ b/src/scicat_configuration.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) import argparse +from collections.abc import Mapping from dataclasses import dataclass -from typing import Mapping, Optional def build_main_arg_parser() -> argparse.ArgumentParser: @@ -152,8 +152,8 @@ class RunOptions: log_message_prefix: str log_level: str check_by_job_id: bool - system_log_facility: Optional[str] = None - pyscicat: Optional[str] = None + system_log_facility: str | None = None + pyscicat: str | None = None graylog: bool = False @@ -220,10 +220,10 @@ def build_scicat_config(input_args: argparse.Namespace) -> ScicatConfig: ): config_dict = json.loads(config_file_path.read_text()) else: - config_dict = dict() + config_dict = {} # Overwrite deep-copied options with command line arguments - run_option_dict: dict = copy.deepcopy(config_dict.setdefault("options", dict())) + run_option_dict: dict = copy.deepcopy(config_dict.setdefault("options", {})) for arg_name, arg_value in vars(input_args).items(): if arg_value is not None: run_option_dict[arg_name] = arg_value @@ -236,6 +236,6 @@ def build_scicat_config(input_args: argparse.Namespace) -> ScicatConfig: return ScicatConfig( original_dict=MappingProxyType(config_dict), run_options=RunOptions(**run_option_dict), - kafka_options=kafkaOptions(**config_dict.setdefault("kafka", dict())), - graylog_options=GraylogOptions(**config_dict.setdefault("graylog", dict())), + kafka_options=kafkaOptions(**config_dict.setdefault("kafka", {})), + graylog_options=GraylogOptions(**config_dict.setdefault("graylog", {})), ) diff --git a/src/scicat_ingestor.py b/src/scicat_ingestor.py index e636b59..9b89aa9 100644 --- a/src/scicat_ingestor.py +++ b/src/scicat_ingestor.py @@ -1,5 +1,16 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +# Copyright (c) 2024 Scicatproject contributors (https://github.com/ScicatProject) +# ruff: noqa: E402, F401 + +import importlib.metadata + +try: + __version__ = importlib.metadata.version(__package__ or __name__) +except importlib.metadata.PackageNotFoundError: + __version__ = "0.0.0" + +del importlib + import logging from collections.abc import Generator from contextlib import contextmanager diff --git a/src/scicat_kafka.py b/src/scicat_kafka.py index 5a5c2a7..6f74781 100644 --- a/src/scicat_kafka.py +++ b/src/scicat_kafka.py @@ -4,14 +4,13 @@ from collections.abc import Generator from confluent_kafka import Consumer +from scicat_configuration import kafkaOptions from streaming_data_types import deserialise_wrdn from streaming_data_types.finished_writing_wrdn import ( FILE_IDENTIFIER as WRDN_FILE_IDENTIFIER, ) from streaming_data_types.finished_writing_wrdn import WritingFinished -from scicat_configuration import kafkaOptions - def collect_consumer_options(options: kafkaOptions) -> dict: """Build a Kafka consumer and configure it according to the ``options``.""" @@ -55,7 +54,7 @@ def build_consumer(kafka_options: kafkaOptions, logger: logging.Logger) -> Consu return None kafka_topics = collect_kafka_topics(kafka_options) - logger.info(f"Subscribing to the following Kafka topics: {kafka_topics}") + logger.info("Subscribing to the following Kafka topics: %s", kafka_topics) consumer.subscribe(kafka_topics) return Consumer(consumer_options) @@ -66,7 +65,8 @@ def validate_consumer(consumer: Consumer, logger: logging.Logger) -> bool: except Exception as err: logger.error( "Kafka consumer could not be instantiated. " - f"Error message from kafka thread: \n{err}" + "Error message from kafka thread: \n%s", + err, ) return False else: diff --git a/src/scicat_logging.py b/src/scicat_logging.py index 2017fe0..ee9a35b 100644 --- a/src/scicat_logging.py +++ b/src/scicat_logging.py @@ -1,11 +1,10 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +import datetime import logging import logging.handlers -from datetime import datetime import graypy - from scicat_configuration import ScicatConfig @@ -24,7 +23,9 @@ def build_logger(config: ScicatConfig) -> logging.Logger: if run_options.file_log: file_name_components = [run_options.log_filepath_prefix] if run_options.file_log_timestamp: - file_name_components.append(datetime.now().strftime('%Y%m%d%H%M%S%f')) + file_name_components.append( + datetime.datetime.now(datetime.UTC).strftime('%Y%m%d%H%M%S%f') + ) file_name_components.append('.log') file_name = '_'.join(file_name_components) diff --git a/src/scicat_metadata.py b/src/scicat_metadata.py index 298465c..baed194 100644 --- a/src/scicat_metadata.py +++ b/src/scicat_metadata.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +from collections.abc import Callable from importlib.metadata import entry_points -from typing import Callable def load_metadata_extractors(extractor_name: str) -> Callable: diff --git a/tests/_scicat_ingestor.py b/tests/_scicat_ingestor.py index abebc25..af4470e 100644 --- a/tests/_scicat_ingestor.py +++ b/tests/_scicat_ingestor.py @@ -9,12 +9,11 @@ from time import sleep # Run the main function in a subprocess - process = subprocess.Popen( - [ - "scicat_ingestor", - *(sys.argv[1:] or ["--verbose", "-c", "resources/config.sample.json"]), - ] + command = ( + "scicat_ingestor", + *(sys.argv[1:] or ["--verbose", "-c", "resources/config.sample.json"]), ) + process = subprocess.Popen(command) # noqa: S603 # Send a SIGINT signal to the process after 5 seconds sleep(5) diff --git a/tests/test_logging.py b/tests/test_logging.py index b89129f..90e999e 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,14 +1,13 @@ import pathlib import pytest - from scicat_configuration import GraylogOptions, RunOptions, ScicatConfig, kafkaOptions -@pytest.fixture +@pytest.fixture() def scicat_config(tmp_path: pathlib.Path) -> ScicatConfig: return ScicatConfig( - original_dict=dict(), + original_dict={}, run_options=RunOptions( config_file='test', verbose=True, diff --git a/tests/test_metadata_extractor.py b/tests/test_metadata_extractor.py index 14d905c..1b25d31 100644 --- a/tests/test_metadata_extractor.py +++ b/tests/test_metadata_extractor.py @@ -1,10 +1,9 @@ import pytest - from scicat_metadata import load_metadata_extractors @pytest.mark.parametrize( - ["extractor_name", "expected_result"], [("max", 5), ("min", 1), ("mean", 3)] + ("extractor_name", "expected_result"), [("max", 5), ("min", 1), ("mean", 3)] ) def test_metadata_extractor(extractor_name: str, expected_result: int): """Test if the metadata extractor can be loaded.""" diff --git a/tests/test_scicat_configuration.py b/tests/test_scicat_configuration.py index 09fb5d2..dacadd0 100644 --- a/tests/test_scicat_configuration.py +++ b/tests/test_scicat_configuration.py @@ -3,11 +3,10 @@ import argparse import pytest - from scicat_configuration import ScicatConfig -@pytest.fixture +@pytest.fixture() def main_arg_parser() -> argparse.ArgumentParser: """Return the namespace of the main argument parser.""" from scicat_configuration import build_main_arg_parser @@ -34,7 +33,7 @@ def test_scicat_arg_parser_configuration_matches( # Parse the configuration file assert config_path.exists() config_from_file: dict = json.loads(config_path.read_text()) - main_options: dict = config_from_file.get('options', dict()) + main_options: dict = config_from_file.get('options', {}) # Check if all keys matches all_keys = set(config_from_args.keys()).union(main_options.keys()) @@ -52,7 +51,7 @@ def test_build_scicat_config_default(main_arg_parser: argparse.ArgumentParser) - assert scicat_config.run_options.config_file == 'config.20240405.json' -@pytest.fixture +@pytest.fixture() def scicat_config(main_arg_parser: argparse.ArgumentParser) -> ScicatConfig: from scicat_configuration import build_scicat_config diff --git a/tox.ini b/tox.ini index 965f349..c9be159 100644 --- a/tox.ini +++ b/tox.ini @@ -11,14 +11,14 @@ commands = scicat_ingestor --help # Minimal test of the script [testenv:nightly] deps = -r requirements/nightly.txt -commands = pytest +commands = pytest {posargs} [testenv:unpinned] description = Test with unpinned dependencies, as a user would install now. deps = -r requirements/basetest.txt scicat-filewriter-ingest -commands = pytest +commands = pytest {posargs} [testenv:docs] description = invoke sphinx-build to build the HTML docs