Skip to content

Commit

Permalink
refactor: bumps versions of pyright, requirements and ruff, also fixe…
Browse files Browse the repository at this point in the history
…s type checker and ruff errors. (#217)
  • Loading branch information
Artanias authored Feb 25, 2025
1 parent 99e5442 commit 3c4cbb1
Show file tree
Hide file tree
Showing 23 changed files with 125 additions and 97 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_n_push_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
pip install $(python3 setup.py --install-requirements)
pip install $(python3 setup.py --build-requirements)
pip install --requirement docs/notebooks/requirements.txt
pip install pre-commit==3.4.0
pip install pre-commit==4.1.0
make pre-commit
docker-build-test-autotest:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ default_language_version:
python: python3.10
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.4
rev: v0.9.7
hooks:
- id: ruff
args: [ --fix ]
Expand All @@ -15,4 +15,4 @@ repos:
language: node
pass_filenames: false
types: [ python ]
additional_dependencies: [ '[email protected].305' ]
additional_dependencies: [ '[email protected].394' ]
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
UTIL_VERSION := 0.5.12
UTIL_VERSION := 0.5.13
UTIL_NAME := codeplag
PWD := $(shell pwd)

USER_UID ?= $(shell id --user)
USER_GID ?= $(shell id --group)

BASE_DOCKER_VERSION := 1.0
BASE_DOCKER_VERSION := 1.1
BASE_DOCKER_TAG := $(shell echo $(UTIL_NAME)-base-ubuntu22.04:$(BASE_DOCKER_VERSION) | tr A-Z a-z)
TEST_DOCKER_TAG := $(shell echo $(UTIL_NAME)-test-ubuntu22.04:$(UTIL_VERSION) | tr A-Z a-z)
DOCKER_TAG ?= $(shell echo $(UTIL_NAME)-ubuntu22.04:$(UTIL_VERSION) | tr A-Z a-z)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@

- Testing for analyzers with pytest lib (required preinstalled pytest framework).
```
$ pip3 install pytest==7.4.0 pytest-mock==3.11.1
$ pip3 install pytest==8.3.4 pytest-mock==3.14.0
$ make test
```

Expand Down
2 changes: 1 addition & 1 deletion docker/test_ubuntu2204.dockerfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update
RUN apt-get install -y debhelper
RUN pip3 install pytest==7.4.0 pytest-mock==3.11.1 @PYTHON_BUILD_LIBS@
RUN pip3 install pytest==8.3.4 pytest-mock==3.14.0 @PYTHON_BUILD_LIBS@
RUN mkdir -p @LOGS_PATH@

# TODO: Move to middle docker file or make another solution
Expand Down
8 changes: 4 additions & 4 deletions docs/notebooks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
matplotlib~=3.7.3
numpy~=1.23.5
pandas~=2.0.3
matplotlib~=3.10.0
numpy~=1.26.4
pandas~=2.2.3
python-decouple~=3.8
scipy~=1.10.1
scipy~=1.15.2
54 changes: 39 additions & 15 deletions docs/notebooks/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
import sys
from datetime import datetime
from time import perf_counter
from typing import Literal
Expand Down Expand Up @@ -34,28 +35,32 @@ def remove_unnecessary_blank_lines(source_code: str) -> str:
return re.sub(pattern, "\n", source_code)


def get_data_from_dir(path: str = "./data", max_count_lines: int | None = None) -> pd.DataFrame:
def get_data_from_dir(
path: str = "./data", max_count_lines: int | None = None
) -> pd.DataFrame | None:
df = pd.DataFrame()
for filename in os.listdir(path):
if not re.search(r".csv$", filename):
continue

tmp_df = pd.read_csv(os.path.join(path, filename), sep=";", index_col=0)
tmp_df = pd.read_csv(os.path.join(path, filename), sep=";", index_col=0) # type: ignore
df = df.append(tmp_df, ignore_index=True)

if max_count_lines:
return df[df.count_lines_without_blank_lines < max_count_lines]
result = df[df.count_lines_without_blank_lines < max_count_lines]
assert isinstance(result, pd.DataFrame) or result is None
return result

return df


def save_works_from_repo_url(url: str, check_policy: bool = True) -> None:
def save_works_from_repo_url(url: str, check_policy: bool = True, min_lines: int = 5) -> None:
current_repo_name = url.split("/")[-1]
env_config = Config(RepositoryEnv("../../.env"))
gh = GitHubParser(
file_extensions=(re.compile(r".py$"),),
check_all=check_policy,
access_token=env_config.get("ACCESS_TOKEN"),
access_token=env_config.get("ACCESS_TOKEN", default=""), # type: ignore
)
files = list(gh.get_files_generator_from_repo_url(url))
files = [(remove_unnecessary_blank_lines(file.code), file.link) for file in files]
Expand All @@ -76,22 +81,34 @@ def save_works_from_repo_url(url: str, check_policy: bool = True) -> None:
],
}
)
df = df[df["count_lines_without_blank_lines"] > 5]
filtered_df = df["count_lines_without_blank_lines"]
assert filtered_df is not None
df = df[filtered_df > min_lines]
if df is None:
print(f"Nothing to save with minimal count of lines '{min_lines}'.", file=sys.stderr)
return
df.to_csv(os.path.join("./data/", current_repo_name + ".csv"), sep=";")


def get_time_to_meta(df: pd.DataFrame, iterations: int = 10) -> pd.DataFrame:
count_lines = []
to_meta_time = []
for index, content in df[["content", "link", "count_lines_without_blank_lines"]].iterrows():
filtered_df = df[["content", "link", "count_lines_without_blank_lines"]]
if filtered_df is None:
raise Exception("DataFrame is empty, nothing to parse.")
for index, content in filtered_df.iterrows():
code = content[0]
filepath = content[1]
assert isinstance(code, str)
assert isinstance(filepath, str)
print(index, " " * 20, end="\r")
for _ in range(iterations):
tree = get_ast_from_content(content[0], content[1])
tree = get_ast_from_content(code, filepath)
if tree is None:
break
try:
start = perf_counter()
get_features_from_ast(tree, content[1])
get_features_from_ast(tree, filepath)
end = perf_counter() - start
to_meta_time.append(end)
count_lines.append(content[2])
Expand Down Expand Up @@ -130,7 +147,7 @@ def plot_and_save_result(
p = np.poly1d(z)
plt.plot(unique_count_lines, p(unique_count_lines), "r--", label="Линейный тренд.")
elif trend == "n^2":
popt_cons, _ = curve_fit(
popt_cons, _ = curve_fit( # type: ignore
square_func,
unique_count_lines,
mean_times,
Expand All @@ -144,7 +161,7 @@ def plot_and_save_result(
label="Квадратичный тренд.",
)
elif trend == "n^3":
popt_cons, _ = curve_fit(
popt_cons, _ = curve_fit( # type: ignore
cube_func,
unique_count_lines,
mean_times,
Expand All @@ -156,7 +173,7 @@ def plot_and_save_result(
p = np.poly1d(popt_cons)
plt.plot(unique_count_lines, p(unique_count_lines), "r--", label="Кубический тренд.")
elif trend == "n^4":
popt_cons, _ = curve_fit(
popt_cons, _ = curve_fit( # type: ignore
quart_func,
unique_count_lines,
mean_times,
Expand Down Expand Up @@ -200,14 +217,21 @@ def get_time_algorithms(
raise Exception("Unexpected error when parsing first work.")

features1 = get_features_from_ast(tree1, work.link)
for index, content in df[["content", "link", "count_lines_without_blank_lines"]].iterrows():
filtered_df = df[["content", "link", "count_lines_without_blank_lines"]]
if filtered_df is None:
raise Exception("DataFrame is empty, nothing to parse.")
for index, content in filtered_df.iterrows():
code = content[0]
filepath = content[1]
assert isinstance(code, str)
assert isinstance(filepath, str)
for _ in range(iterations):
print(index, " " * 20, end="\r")
tree2 = get_ast_from_content(content[0], content[1])
tree2 = get_ast_from_content(code, filepath)
if tree2 is None:
continue
try:
features2 = get_features_from_ast(tree2, content[1])
features2 = get_features_from_ast(tree2, filepath)
except Exception:
continue

Expand Down
18 changes: 9 additions & 9 deletions locales/codeplag.pot
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.5.12\n"
"POT-Creation-Date: 2025-01-03 14:06+0300\n"
"Project-Id-Version: codeplag 0.5.13\n"
"POT-Creation-Date: 2025-02-25 22:01+0300\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Artyom Semidolin\n"
"Language-Team: LANGUAGE <[email protected]>\n"
Expand Down Expand Up @@ -190,33 +190,33 @@ msgid ""
"languages."
msgstr ""

#: src/codeplag/codeplagcli.py:366
#: src/codeplag/codeplagcli.py:365
msgid "Print current version number and exit."
msgstr ""

#: src/codeplag/codeplagcli.py:372
#: src/codeplag/codeplagcli.py:371
msgid "Commands help."
msgstr ""

#: src/codeplag/codeplagcli.py:387
#: src/codeplag/codeplagcli.py:386
msgid "No command is provided; please choose one from the available (--help)."
msgstr ""

#: src/codeplag/codeplagcli.py:398
#: src/codeplag/codeplagcli.py:397
msgid "There is nothing to modify; please provide at least one argument."
msgstr ""

#: src/codeplag/codeplagcli.py:402
#: src/codeplag/codeplagcli.py:401
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgstr ""

#: src/codeplag/codeplagcli.py:410
#: src/codeplag/codeplagcli.py:409
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."
msgstr ""

#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95
#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95
msgid "All paths must be provided."
msgstr ""

Expand Down
16 changes: 8 additions & 8 deletions locales/translations/en/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.5.12\n"
"Project-Id-Version: codeplag 0.5.13\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2024-05-16 19:15+0300\n"
"Last-Translator: Artyom Semidolin\n"
Expand Down Expand Up @@ -214,35 +214,35 @@ msgstr ""
"Program help to find similar parts of source codes for the different "
"languages."

#: src/codeplag/codeplagcli.py:366
#: src/codeplag/codeplagcli.py:365
msgid "Print current version number and exit."
msgstr "Print current version number and exit."

#: src/codeplag/codeplagcli.py:372
#: src/codeplag/codeplagcli.py:371
msgid "Commands help."
msgstr "Commands help."

#: src/codeplag/codeplagcli.py:387
#: src/codeplag/codeplagcli.py:386
msgid "No command is provided; please choose one from the available (--help)."
msgstr "No command is provided; please choose one from the available (--help)."

#: src/codeplag/codeplagcli.py:398
#: src/codeplag/codeplagcli.py:397
msgid "There is nothing to modify; please provide at least one argument."
msgstr "There is nothing to modify; please provide at least one argument."

#: src/codeplag/codeplagcli.py:402
#: src/codeplag/codeplagcli.py:401
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgstr "The'repo-regexp' option requires the provided 'github-user' option."

#: src/codeplag/codeplagcli.py:410
#: src/codeplag/codeplagcli.py:409
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."
msgstr ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."

#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95
#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95
msgid "All paths must be provided."
msgstr "All or none of the root paths must be specified."

Expand Down
16 changes: 8 additions & 8 deletions locales/translations/ru/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.5.12\n"
"Project-Id-Version: codeplag 0.5.13\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2024-05-11 12:05+0300\n"
"Last-Translator: Artyom Semidolin\n"
Expand Down Expand Up @@ -224,39 +224,39 @@ msgstr ""
"Программа помогает находить схожие части исходных кодов для разных языков"
" программирования."

#: src/codeplag/codeplagcli.py:366
#: src/codeplag/codeplagcli.py:365
msgid "Print current version number and exit."
msgstr "Выводит текущую версию программы."

#: src/codeplag/codeplagcli.py:372
#: src/codeplag/codeplagcli.py:371
msgid "Commands help."
msgstr "Справка по командам."

#: src/codeplag/codeplagcli.py:387
#: src/codeplag/codeplagcli.py:386
msgid "No command is provided; please choose one from the available (--help)."
msgstr ""
"Ни одна из команд не выбрана, пожалуйста, выбери одну из доступных команд"
" (--help)."

#: src/codeplag/codeplagcli.py:398
#: src/codeplag/codeplagcli.py:397
msgid "There is nothing to modify; please provide at least one argument."
msgstr ""
"Нечего модифицировать, пожалуйста, выберите один из параметров для "
"модификации."

#: src/codeplag/codeplagcli.py:402
#: src/codeplag/codeplagcli.py:401
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgstr "Аргумент 'repo-regexp' требует заданного параметра 'github-user'."

#: src/codeplag/codeplagcli.py:410
#: src/codeplag/codeplagcli.py:409
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."
msgstr ""
"Аргумент 'path-regexp' требует заданного параметра 'directories', "
"'github-user' или 'github-project-folder'."

#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95
#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95
msgid "All paths must be provided."
msgstr "Необходимо указать все корневые пути или не указывать ни одного."

Expand Down
Loading

0 comments on commit 3c4cbb1

Please sign in to comment.