From 3c4cbb12f8a1c7ead5fc5cfe681d5154a6cc8520 Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Tue, 25 Feb 2025 23:50:14 +0300 Subject: [PATCH] refactor: bumps versions of pyright, requirements and ruff, also fixes type checker and ruff errors. (#217) --- .github/workflows/check_n_push_image.yml | 2 +- .pre-commit-config.yaml | 4 +- Makefile | 4 +- README.md | 2 +- docker/test_ubuntu2204.dockerfile.in | 2 +- docs/notebooks/requirements.txt | 8 +-- docs/notebooks/utils.py | 54 +++++++++++++------ locales/codeplag.pot | 18 +++---- .../translations/en/LC_MESSAGES/codeplag.po | 16 +++--- .../translations/ru/LC_MESSAGES/codeplag.po | 16 +++--- setup.py | 24 ++++----- src/codeplag/algorithms/stringbased.py | 10 ++-- src/codeplag/codeplagcli.py | 5 +- src/codeplag/cplag/const.py | 10 ++-- src/codeplag/cplag/tree.py | 6 +-- src/codeplag/display.py | 5 +- src/codeplag/handlers/check.py | 6 ++- src/codeplag/handlers/settings.py | 3 +- src/codeplag/pyplag/astwalkers.py | 4 +- src/codeplag/reporters.py | 7 +-- src/webparsers/async_github_parser.py | 5 +- src/webparsers/github_parser.py | 5 +- test/misc/test_makefile.py | 6 +-- 23 files changed, 125 insertions(+), 97 deletions(-) diff --git a/.github/workflows/check_n_push_image.yml b/.github/workflows/check_n_push_image.yml index 6274ffe..7932121 100644 --- a/.github/workflows/check_n_push_image.yml +++ b/.github/workflows/check_n_push_image.yml @@ -34,7 +34,7 @@ jobs: pip install $(python3 setup.py --install-requirements) pip install $(python3 setup.py --build-requirements) pip install --requirement docs/notebooks/requirements.txt - pip install pre-commit==3.4.0 + pip install pre-commit==4.1.0 make pre-commit docker-build-test-autotest: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0cd6c94..c90343c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_language_version: python: python3.10 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.4 + rev: v0.9.7 hooks: - id: ruff args: [ --fix ] @@ -15,4 +15,4 @@ repos: language: node pass_filenames: false types: [ python ] - additional_dependencies: [ 'pyright@1.1.305' ] + additional_dependencies: [ 'pyright@1.1.394' ] diff --git a/Makefile b/Makefile index 4cf31da..79701d7 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ -UTIL_VERSION := 0.5.12 +UTIL_VERSION := 0.5.13 UTIL_NAME := codeplag PWD := $(shell pwd) USER_UID ?= $(shell id --user) USER_GID ?= $(shell id --group) -BASE_DOCKER_VERSION := 1.0 +BASE_DOCKER_VERSION := 1.1 BASE_DOCKER_TAG := $(shell echo $(UTIL_NAME)-base-ubuntu22.04:$(BASE_DOCKER_VERSION) | tr A-Z a-z) TEST_DOCKER_TAG := $(shell echo $(UTIL_NAME)-test-ubuntu22.04:$(UTIL_VERSION) | tr A-Z a-z) DOCKER_TAG ?= $(shell echo $(UTIL_NAME)-ubuntu22.04:$(UTIL_VERSION) | tr A-Z a-z) diff --git a/README.md b/README.md index bd2866f..37d73dd 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ - Testing for analyzers with pytest lib (required preinstalled pytest framework). ``` - $ pip3 install pytest==7.4.0 pytest-mock==3.11.1 + $ pip3 install pytest==8.3.4 pytest-mock==3.14.0 $ make test ``` diff --git a/docker/test_ubuntu2204.dockerfile.in b/docker/test_ubuntu2204.dockerfile.in index a267ba4..acda41c 100644 --- a/docker/test_ubuntu2204.dockerfile.in +++ b/docker/test_ubuntu2204.dockerfile.in @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update RUN apt-get install -y debhelper -RUN pip3 install pytest==7.4.0 pytest-mock==3.11.1 @PYTHON_BUILD_LIBS@ +RUN pip3 install pytest==8.3.4 pytest-mock==3.14.0 @PYTHON_BUILD_LIBS@ RUN mkdir -p @LOGS_PATH@ # TODO: Move to middle docker file or make another solution diff --git a/docs/notebooks/requirements.txt b/docs/notebooks/requirements.txt index 0e86839..26d30d4 100644 --- a/docs/notebooks/requirements.txt +++ b/docs/notebooks/requirements.txt @@ -1,5 +1,5 @@ -matplotlib~=3.7.3 -numpy~=1.23.5 -pandas~=2.0.3 +matplotlib~=3.10.0 +numpy~=1.26.4 +pandas~=2.2.3 python-decouple~=3.8 -scipy~=1.10.1 +scipy~=1.15.2 diff --git a/docs/notebooks/utils.py b/docs/notebooks/utils.py index b92ec64..db68095 100644 --- a/docs/notebooks/utils.py +++ b/docs/notebooks/utils.py @@ -1,5 +1,6 @@ import os import re +import sys from datetime import datetime from time import perf_counter from typing import Literal @@ -34,28 +35,32 @@ def remove_unnecessary_blank_lines(source_code: str) -> str: return re.sub(pattern, "\n", source_code) -def get_data_from_dir(path: str = "./data", max_count_lines: int | None = None) -> pd.DataFrame: +def get_data_from_dir( + path: str = "./data", max_count_lines: int | None = None +) -> pd.DataFrame | None: df = pd.DataFrame() for filename in os.listdir(path): if not re.search(r".csv$", filename): continue - tmp_df = pd.read_csv(os.path.join(path, filename), sep=";", index_col=0) + tmp_df = pd.read_csv(os.path.join(path, filename), sep=";", index_col=0) # type: ignore df = df.append(tmp_df, ignore_index=True) if max_count_lines: - return df[df.count_lines_without_blank_lines < max_count_lines] + result = df[df.count_lines_without_blank_lines < max_count_lines] + assert isinstance(result, pd.DataFrame) or result is None + return result return df -def save_works_from_repo_url(url: str, check_policy: bool = True) -> None: +def save_works_from_repo_url(url: str, check_policy: bool = True, min_lines: int = 5) -> None: current_repo_name = url.split("/")[-1] env_config = Config(RepositoryEnv("../../.env")) gh = GitHubParser( file_extensions=(re.compile(r".py$"),), check_all=check_policy, - access_token=env_config.get("ACCESS_TOKEN"), + access_token=env_config.get("ACCESS_TOKEN", default=""), # type: ignore ) files = list(gh.get_files_generator_from_repo_url(url)) files = [(remove_unnecessary_blank_lines(file.code), file.link) for file in files] @@ -76,22 +81,34 @@ def save_works_from_repo_url(url: str, check_policy: bool = True) -> None: ], } ) - df = df[df["count_lines_without_blank_lines"] > 5] + filtered_df = df["count_lines_without_blank_lines"] + assert filtered_df is not None + df = df[filtered_df > min_lines] + if df is None: + print(f"Nothing to save with minimal count of lines '{min_lines}'.", file=sys.stderr) + return df.to_csv(os.path.join("./data/", current_repo_name + ".csv"), sep=";") def get_time_to_meta(df: pd.DataFrame, iterations: int = 10) -> pd.DataFrame: count_lines = [] to_meta_time = [] - for index, content in df[["content", "link", "count_lines_without_blank_lines"]].iterrows(): + filtered_df = df[["content", "link", "count_lines_without_blank_lines"]] + if filtered_df is None: + raise Exception("DataFrame is empty, nothing to parse.") + for index, content in filtered_df.iterrows(): + code = content[0] + filepath = content[1] + assert isinstance(code, str) + assert isinstance(filepath, str) print(index, " " * 20, end="\r") for _ in range(iterations): - tree = get_ast_from_content(content[0], content[1]) + tree = get_ast_from_content(code, filepath) if tree is None: break try: start = perf_counter() - get_features_from_ast(tree, content[1]) + get_features_from_ast(tree, filepath) end = perf_counter() - start to_meta_time.append(end) count_lines.append(content[2]) @@ -130,7 +147,7 @@ def plot_and_save_result( p = np.poly1d(z) plt.plot(unique_count_lines, p(unique_count_lines), "r--", label="Линейный тренд.") elif trend == "n^2": - popt_cons, _ = curve_fit( + popt_cons, _ = curve_fit( # type: ignore square_func, unique_count_lines, mean_times, @@ -144,7 +161,7 @@ def plot_and_save_result( label="Квадратичный тренд.", ) elif trend == "n^3": - popt_cons, _ = curve_fit( + popt_cons, _ = curve_fit( # type: ignore cube_func, unique_count_lines, mean_times, @@ -156,7 +173,7 @@ def plot_and_save_result( p = np.poly1d(popt_cons) plt.plot(unique_count_lines, p(unique_count_lines), "r--", label="Кубический тренд.") elif trend == "n^4": - popt_cons, _ = curve_fit( + popt_cons, _ = curve_fit( # type: ignore quart_func, unique_count_lines, mean_times, @@ -200,14 +217,21 @@ def get_time_algorithms( raise Exception("Unexpected error when parsing first work.") features1 = get_features_from_ast(tree1, work.link) - for index, content in df[["content", "link", "count_lines_without_blank_lines"]].iterrows(): + filtered_df = df[["content", "link", "count_lines_without_blank_lines"]] + if filtered_df is None: + raise Exception("DataFrame is empty, nothing to parse.") + for index, content in filtered_df.iterrows(): + code = content[0] + filepath = content[1] + assert isinstance(code, str) + assert isinstance(filepath, str) for _ in range(iterations): print(index, " " * 20, end="\r") - tree2 = get_ast_from_content(content[0], content[1]) + tree2 = get_ast_from_content(code, filepath) if tree2 is None: continue try: - features2 = get_features_from_ast(tree2, content[1]) + features2 = get_features_from_ast(tree2, filepath) except Exception: continue diff --git a/locales/codeplag.pot b/locales/codeplag.pot index 9d787ee..4cb0de1 100644 --- a/locales/codeplag.pot +++ b/locales/codeplag.pot @@ -5,8 +5,8 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.12\n" -"POT-Creation-Date: 2025-01-03 14:06+0300\n" +"Project-Id-Version: codeplag 0.5.13\n" +"POT-Creation-Date: 2025-02-25 22:01+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: Artyom Semidolin\n" "Language-Team: LANGUAGE \n" @@ -190,33 +190,33 @@ msgid "" "languages." msgstr "" -#: src/codeplag/codeplagcli.py:366 +#: src/codeplag/codeplagcli.py:365 msgid "Print current version number and exit." msgstr "" -#: src/codeplag/codeplagcli.py:372 +#: src/codeplag/codeplagcli.py:371 msgid "Commands help." msgstr "" -#: src/codeplag/codeplagcli.py:387 +#: src/codeplag/codeplagcli.py:386 msgid "No command is provided; please choose one from the available (--help)." msgstr "" -#: src/codeplag/codeplagcli.py:398 +#: src/codeplag/codeplagcli.py:397 msgid "There is nothing to modify; please provide at least one argument." msgstr "" -#: src/codeplag/codeplagcli.py:402 +#: src/codeplag/codeplagcli.py:401 msgid "The'repo-regexp' option requires the provided 'github-user' option." msgstr "" -#: src/codeplag/codeplagcli.py:410 +#: src/codeplag/codeplagcli.py:409 msgid "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." msgstr "" -#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95 +#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95 msgid "All paths must be provided." msgstr "" diff --git a/locales/translations/en/LC_MESSAGES/codeplag.po b/locales/translations/en/LC_MESSAGES/codeplag.po index a244c48..95f8b3c 100644 --- a/locales/translations/en/LC_MESSAGES/codeplag.po +++ b/locales/translations/en/LC_MESSAGES/codeplag.po @@ -4,7 +4,7 @@ # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.12\n" +"Project-Id-Version: codeplag 0.5.13\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2024-05-16 19:15+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -214,27 +214,27 @@ msgstr "" "Program help to find similar parts of source codes for the different " "languages." -#: src/codeplag/codeplagcli.py:366 +#: src/codeplag/codeplagcli.py:365 msgid "Print current version number and exit." msgstr "Print current version number and exit." -#: src/codeplag/codeplagcli.py:372 +#: src/codeplag/codeplagcli.py:371 msgid "Commands help." msgstr "Commands help." -#: src/codeplag/codeplagcli.py:387 +#: src/codeplag/codeplagcli.py:386 msgid "No command is provided; please choose one from the available (--help)." msgstr "No command is provided; please choose one from the available (--help)." -#: src/codeplag/codeplagcli.py:398 +#: src/codeplag/codeplagcli.py:397 msgid "There is nothing to modify; please provide at least one argument." msgstr "There is nothing to modify; please provide at least one argument." -#: src/codeplag/codeplagcli.py:402 +#: src/codeplag/codeplagcli.py:401 msgid "The'repo-regexp' option requires the provided 'github-user' option." msgstr "The'repo-regexp' option requires the provided 'github-user' option." -#: src/codeplag/codeplagcli.py:410 +#: src/codeplag/codeplagcli.py:409 msgid "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." @@ -242,7 +242,7 @@ msgstr "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." -#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95 +#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95 msgid "All paths must be provided." msgstr "All or none of the root paths must be specified." diff --git a/locales/translations/ru/LC_MESSAGES/codeplag.po b/locales/translations/ru/LC_MESSAGES/codeplag.po index 02b36dd..dd19af9 100644 --- a/locales/translations/ru/LC_MESSAGES/codeplag.po +++ b/locales/translations/ru/LC_MESSAGES/codeplag.po @@ -4,7 +4,7 @@ # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.12\n" +"Project-Id-Version: codeplag 0.5.13\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2024-05-11 12:05+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -224,31 +224,31 @@ msgstr "" "Программа помогает находить схожие части исходных кодов для разных языков" " программирования." -#: src/codeplag/codeplagcli.py:366 +#: src/codeplag/codeplagcli.py:365 msgid "Print current version number and exit." msgstr "Выводит текущую версию программы." -#: src/codeplag/codeplagcli.py:372 +#: src/codeplag/codeplagcli.py:371 msgid "Commands help." msgstr "Справка по командам." -#: src/codeplag/codeplagcli.py:387 +#: src/codeplag/codeplagcli.py:386 msgid "No command is provided; please choose one from the available (--help)." msgstr "" "Ни одна из команд не выбрана, пожалуйста, выбери одну из доступных команд" " (--help)." -#: src/codeplag/codeplagcli.py:398 +#: src/codeplag/codeplagcli.py:397 msgid "There is nothing to modify; please provide at least one argument." msgstr "" "Нечего модифицировать, пожалуйста, выберите один из параметров для " "модификации." -#: src/codeplag/codeplagcli.py:402 +#: src/codeplag/codeplagcli.py:401 msgid "The'repo-regexp' option requires the provided 'github-user' option." msgstr "Аргумент 'repo-regexp' требует заданного параметра 'github-user'." -#: src/codeplag/codeplagcli.py:410 +#: src/codeplag/codeplagcli.py:409 msgid "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." @@ -256,7 +256,7 @@ msgstr "" "Аргумент 'path-regexp' требует заданного параметра 'directories', " "'github-user' или 'github-project-folder'." -#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95 +#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95 msgid "All paths must be provided." msgstr "Необходимо указать все корневые пути или не указывать ни одного." diff --git a/setup.py b/setup.py index 474d5e5..ef76373 100644 --- a/setup.py +++ b/setup.py @@ -3,26 +3,26 @@ from pathlib import Path BUILD_REQUIREMENTS: tuple[str, ...] = ( - "argparse-manpage==3", - "Babel==2.15.0", - "Cython~=3.0.8", - "setuptools~=75.8.0", - "Jinja2~=3.1.2", + "argparse-manpage==4.6", + "Babel==2.17.0", + "Cython~=3.0.12", + "setuptools~=75.8.1", + "Jinja2~=3.1.5", ) INSTALL_REQUIREMENTS: tuple[str, ...] = ( - "argcomplete~=2.0.0", - "numpy~=1.23.5", - "pandas~=1.4.3", + "argcomplete~=3.5.3", + "numpy~=1.26.4", + "pandas~=2.2.3", "ccsyspath~=1.1.0", "clang~=14.0.6", "llvmlite~=0.42.0", "libclang~=14.0.6", "python-decouple~=3.8", - "requests~=2.31.0", - "typing-extensions~=4.3.0", + "requests~=2.32.3", + "typing-extensions~=4.12.2", "aiohttp~=3.9.3", - "Jinja2~=3.1.2", - "cachetools==5.3.1", + "Jinja2~=3.1.5", + "cachetools==5.5.2", "gidgethub~=5.3.0", ) UTIL_NAME = os.getenv("UTIL_NAME") diff --git a/src/codeplag/algorithms/stringbased.py b/src/codeplag/algorithms/stringbased.py index aff33c1..41cced1 100644 --- a/src/codeplag/algorithms/stringbased.py +++ b/src/codeplag/algorithms/stringbased.py @@ -32,10 +32,12 @@ def calculate_distance_matrix(self: Self) -> None: for column in np.arange(1, self.s2_length + 1): symbol1 = self.sequence1[row - 1] symbol2 = self.sequence2[column - 1] - minimum = min( - self.distance_matrix[row - 1][column] + 1, - self.distance_matrix[row][column - 1] + 1, - self.distance_matrix[row - 1][column - 1] + self.m(symbol1, symbol2), + minimum = np.min( + [ + self.distance_matrix[row - 1][column] + 1, + self.distance_matrix[row][column - 1] + 1, + self.distance_matrix[row - 1][column - 1] + self.m(symbol1, symbol2), + ] ) self.distance_matrix[row][column] = minimum diff --git a/src/codeplag/codeplagcli.py b/src/codeplag/codeplagcli.py index 28763a6..dc2955f 100644 --- a/src/codeplag/codeplagcli.py +++ b/src/codeplag/codeplagcli.py @@ -45,7 +45,7 @@ def __call__( raise argparse.ArgumentError( self, _( - "You cannot specify the same value multiple times. " "You provided '{values}'." + "You cannot specify the same value multiple times. You provided '{values}'." ).format(values=str_values), ) setattr(namespace, self.dest, values) @@ -356,8 +356,7 @@ def __init__(self: Self) -> None: prog=UTIL_NAME, formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=_( - "Program help to find similar parts of source " - "codes for the different languages." + "Program help to find similar parts of source codes for the different languages." ), ) self.add_argument( diff --git a/src/codeplag/cplag/const.py b/src/codeplag/cplag/const.py index 13f5f00..55864bb 100644 --- a/src/codeplag/cplag/const.py +++ b/src/codeplag/cplag/const.py @@ -11,12 +11,12 @@ def get_compile_args() -> list[str]: COMPILE_ARGS = get_compile_args() IGNORE = [ - CursorKind.PREPROCESSING_DIRECTIVE, + CursorKind.PREPROCESSING_DIRECTIVE, # type: ignore # CursorKind.MACRO_DEFINITION, - CursorKind.MACRO_INSTANTIATION, - CursorKind.INCLUSION_DIRECTIVE, - CursorKind.USING_DIRECTIVE, - CursorKind.NAMESPACE, + CursorKind.MACRO_INSTANTIATION, # type: ignore + CursorKind.INCLUSION_DIRECTIVE, # type: ignore + CursorKind.USING_DIRECTIVE, # type: ignore + CursorKind.NAMESPACE, # type: ignore ] # fmt: off OPERATORS = ( diff --git a/src/codeplag/cplag/tree.py b/src/codeplag/cplag/tree.py index 44d7b4c..f987b82 100644 --- a/src/codeplag/cplag/tree.py +++ b/src/codeplag/cplag/tree.py @@ -44,11 +44,11 @@ def generic_visit(node: Cursor, features: ASTFeatures, curr_depth: int = 0) -> N def get_features(tree: Cursor, filepath: Path | str = "") -> ASTFeatures: features = ASTFeatures(filepath or tree.displayname) for token in tree.get_tokens(): - if token.kind == TokenKind.PUNCTUATION and token.spelling in OPERATORS: + if token.kind == TokenKind.PUNCTUATION and token.spelling in OPERATORS: # type: ignore features.operators[token.spelling] += 1 - if token.kind == TokenKind.KEYWORD: + if token.kind == TokenKind.KEYWORD: # type: ignore features.keywords[token.spelling] += 1 - if token.kind == TokenKind.LITERAL: + if token.kind == TokenKind.LITERAL: # type: ignore features.literals[token.spelling] += 1 generic_visit(tree, features) diff --git a/src/codeplag/display.py b/src/codeplag/display.py index 9b85707..ff6f5a3 100644 --- a/src/codeplag/display.py +++ b/src/codeplag/display.py @@ -4,6 +4,7 @@ from time import monotonic from typing import Final +import numpy as np import pandas as pd from typing_extensions import Self @@ -183,7 +184,7 @@ def print_compare_result( print("May be similar:", message, end="\n\n", sep="\n") main_metrics_df = pd.DataFrame( [compare_info.fast], - index=["Similarity"], + index=np.array(["Similarity"]), columns=pd.Index( (field.upper() for field in compare_info.fast._fields), name="FastMetrics:" ), @@ -196,7 +197,7 @@ def print_compare_result( additional_metrics_df = pd.DataFrame( compare_info.structure.similarity, - index=["Similarity"], + index=np.array(["Similarity"]), columns=pd.Index(["Structure"], name="AdditionalMetrics:"), ) print(additional_metrics_df) diff --git a/src/codeplag/handlers/check.py b/src/codeplag/handlers/check.py index 0132d92..f7c3912 100644 --- a/src/codeplag/handlers/check.py +++ b/src/codeplag/handlers/check.py @@ -127,7 +127,7 @@ def set_github_parser(self: Self, all_branches: bool, environment: Path | None = """ if not environment: logger.warning( - "Env file not found or not a file. " "Trying to get token from environment." + "Env file not found or not a file. Trying to get token from environment." ) access_token: str = os.environ.get("ACCESS_TOKEN", "") else: @@ -398,7 +398,9 @@ def compliance_matrix_to_df( for row in range(compliance_matrix.shape[0]): for col in range(compliance_matrix.shape[1]): data[row][col] = compliance_matrix[row][col][0] / compliance_matrix[row][col][1] - compliance_matrix_df = pd.DataFrame(data=data, index=head_nodes1, columns=head_nodes2) + compliance_matrix_df = pd.DataFrame( + data=data, index=np.array(head_nodes1), columns=np.array(head_nodes2) + ) return compliance_matrix_df diff --git a/src/codeplag/handlers/settings.py b/src/codeplag/handlers/settings.py index 796d52b..4940835 100644 --- a/src/codeplag/handlers/settings.py +++ b/src/codeplag/handlers/settings.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import Any +import numpy as np import pandas as pd from codeplag.config import read_settings_conf, write_settings_conf @@ -11,7 +12,7 @@ def settings_show() -> None: settings_config = read_settings_conf() table = pd.DataFrame( list(settings_config.values()), - index=settings_config.keys(), + index=np.array(settings_config), columns=pd.Index(["Value"], name="Key"), ) print(table) diff --git a/src/codeplag/pyplag/astwalkers.py b/src/codeplag/pyplag/astwalkers.py index a006e92..4b824b0 100644 --- a/src/codeplag/pyplag/astwalkers.py +++ b/src/codeplag/pyplag/astwalkers.py @@ -47,7 +47,7 @@ def add_node_to_structure(self: Self, node: ast.AST, node_name: str) -> None: actual_node_name = self.__get_actual_name_from_node(node) if actual_node_name is None: actual_node_name = node_name - self.features.head_nodes.append(f"{actual_node_name}[{node.lineno}]") + self.features.head_nodes.append(f"{actual_node_name}[{node.lineno}]") # type: ignore def generic_visit(self: Self, node: ast.AST) -> None: """Traverses, counts operators, keywords, and literals, and saves sequence of operators. @@ -62,7 +62,7 @@ def generic_visit(self: Self, node: ast.AST) -> None: self.features.tokens.append(TO_TOKEN[type_name]) if "lineno" in dir(node) and "col_offset" in dir(node): self.features.tokens_pos.append( - NodeCodePlace(lineno=node.lineno, col_offset=node.col_offset) + NodeCodePlace(lineno=node.lineno, col_offset=node.col_offset) # type: ignore ) else: self.features.tokens_pos.append(self.features.tokens_pos[-1]) diff --git a/src/codeplag/reporters.py b/src/codeplag/reporters.py index affb239..1915d90 100644 --- a/src/codeplag/reporters.py +++ b/src/codeplag/reporters.py @@ -1,4 +1,4 @@ -"""This module contains logic for saving a comparison result into JSON or CSV.""" +"""This module contains logic for saving a comparison result into CSV.""" import json from abc import ABC, abstractmethod @@ -41,7 +41,7 @@ def __init__(self: Self, reports: Path) -> None: if self.reports_path.is_file(): self.__df_report = read_df(self.reports_path) else: - self.__df_report = pd.DataFrame(columns=CSV_REPORT_COLUMNS, dtype=object) + self.__df_report = pd.DataFrame(columns=np.array(CSV_REPORT_COLUMNS), dtype=object) self.__csv_last_save = monotonic() def save_result( @@ -65,7 +65,8 @@ def save_result( (self.__df_report.first_path == str(first_work.filepath)) & (self.__df_report.second_path == str(second_work.filepath)) ] - self.__df_report.drop(cache_val.index, inplace=True) + if isinstance(cache_val, pd.DataFrame): + self.__df_report.drop(cache_val.index, inplace=True) # type: ignore self.__df_report = pd.concat( [ self.__df_report, diff --git a/src/webparsers/async_github_parser.py b/src/webparsers/async_github_parser.py index a20933a..ee31513 100644 --- a/src/webparsers/async_github_parser.py +++ b/src/webparsers/async_github_parser.py @@ -371,10 +371,9 @@ async def get_files_generator_from_dir_url( ) for node in response: - current_path = f'/{node["path"]}' + current_path = f"/{node['path']}" full_link = ( - f"{_GH_URL}{dir_url.owner}/{dir_url.repo}" - f"/tree/{dir_url.branch}/{current_path[2:]}" + f"{_GH_URL}{dir_url.owner}/{dir_url.repo}/tree/{dir_url.branch}/{current_path[2:]}" ) node_type = node["type"] if node_type == "dir": diff --git a/src/webparsers/github_parser.py b/src/webparsers/github_parser.py index 45e446f..acbe006 100644 --- a/src/webparsers/github_parser.py +++ b/src/webparsers/github_parser.py @@ -320,10 +320,9 @@ def get_files_generator_from_dir_url( response_json = self.send_get_request(api_url, params=params).json() for node in response_json: - current_path = f'/{node["path"]}' + current_path = f"/{node['path']}" full_link = ( - f"{_GH_URL}{dir_url.owner}/{dir_url.repo}" - f"/tree/{dir_url.branch}/{current_path[2:]}" + f"{_GH_URL}{dir_url.owner}/{dir_url.repo}/tree/{dir_url.branch}/{current_path[2:]}" ) node_type = node["type"] if node_type == "dir": diff --git a/test/misc/test_makefile.py b/test/misc/test_makefile.py index 9777afa..e93e009 100644 --- a/test/misc/test_makefile.py +++ b/test/misc/test_makefile.py @@ -46,9 +46,9 @@ def test_makefile_consist_help_msgs_for_all_targets(makefile_targets: set[str]): continue makefile_help_targets.append(target_match.group("target_name")) unique_makefile_help_targets = set(makefile_help_targets) - assert len(makefile_help_targets) == len( - unique_makefile_help_targets - ), "Some targets' help messages repeats." + assert len(makefile_help_targets) == len(unique_makefile_help_targets), ( + "Some targets' help messages repeats." + ) unique_makefile_help_targets -= MAKEFILE_HELP_TARGETS_IGNORE targets_without_help_message = makefile_targets - unique_makefile_help_targets targets_which_only_in_the_makehelp = unique_makefile_help_targets - makefile_targets