From 4b33dd4829a8a942702bf4cdb28b80f053f22981 Mon Sep 17 00:00:00 2001
From: CBeck-96 <136458901+CBeck-96@users.noreply.github.com>
Date: Sat, 4 Jan 2025 20:51:04 +0100
Subject: [PATCH] refactor: configure mypy to strict mode (#339)

---
 .pre-commit-config.yaml          |  8 +++++++-
 cdxev/__main__.py                |  5 +++--
 cdxev/amend/operations.py        | 14 ++++++++++----
 cdxev/auxiliary/identity.py      |  4 ++--
 cdxev/auxiliary/sbomFunctions.py | 12 ++++++++++--
 cdxev/initialize_sbom.py         | 21 +++++++--------------
 cdxev/list_command.py            | 29 ++++++++++-------------------
 cdxev/merge.py                   |  2 +-
 cdxev/set.py                     |  6 +++---
 cdxev/validator/helper.py        | 20 +++++++++++++++++---
 cdxev/validator/validate.py      | 10 ++++++----
 pyproject.toml                   | 30 ++++++++++++++++++++++++++++--
 12 files changed, 104 insertions(+), 57 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4d9b3862..49d87c02 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     hooks:
       - id: flake8
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: "v1.8.0"
+    rev: "v1.13.0"
     hooks:
       - id: mypy
         # List type stub dependencies explicitly, as --install-types should be avoided as per
@@ -32,7 +32,13 @@ repos:
           - types-python-dateutil==2.9.0.20241003
           - typing-extensions==4.12.2
           - types-jsonschema==4.23.0.20240813
+          - cyclonedx-python-lib==8.5.0
+          - univers==30.12.1
+          - charset-normalizer==3.4.0
+          - natsort==8.4.0
+          - docstring-parser==0.16
         files: "^cdxev/.*\\.py$"
+        args: ["--config-file", "pyproject.toml"]
   - repo: https://github.com/PyCQA/bandit
     rev: "1.7.7"
     hooks:
diff --git a/cdxev/__main__.py b/cdxev/__main__.py
index 0d0a6862..64dd04a4 100644
--- a/cdxev/__main__.py
+++ b/cdxev/__main__.py
@@ -9,6 +9,7 @@
 import shutil
 import sys
 import textwrap
+import typing as t
 from collections.abc import MutableSequence
 from dataclasses import dataclass
 from pathlib import Path
@@ -42,7 +43,7 @@ class Status(enum.IntEnum):
     VALIDATION_ERROR = 4
 
 
-def main() -> int:
+def main() -> t.Union[int, t.Any]:
     """Main entry point for this tool."""
     args = parse_cli()
 
@@ -93,7 +94,7 @@ def read_sbom(sbom_file: Path, file_type: Optional[str] = None) -> Tuple[dict, s
     return sbom, file_type
 
 
-def load_json(path: Path) -> dict:
+def load_json(path: Path) -> t.Any:
     """Loads a JSON file into a dictionary."""
     try:
         with path.open(encoding="utf-8-sig") as file:
diff --git a/cdxev/amend/operations.py b/cdxev/amend/operations.py
index 49f09539..bf2a72cb 100644
--- a/cdxev/amend/operations.py
+++ b/cdxev/amend/operations.py
@@ -312,9 +312,9 @@ class LicenseNameToId(Operation):
 
     license_map: dict[str, str] = {}
 
-    def prepare(self, sbom: dict) -> None:
+    def prepare(self, sbom: dict) -> None:  # type: ignore
         license_mapping_file = (
-            importlib.resources.files(__spec__.parent) / "license_name_spdx_id_map.json"  # type: ignore[name-defined, arg-type]  # noqa: E501
+            importlib.resources.files(__spec__.parent) / "license_name_spdx_id_map.json"  # type: ignore[arg-type]  # noqa: E501
         )
         license_mapping_json = license_mapping_file.read_text(encoding="utf-8-sig")
         license_mapping = json.loads(license_mapping_json)
@@ -446,10 +446,16 @@ class DeleteAmbiguousLicenses(Operation):
     """
 
     def _has_text(self, license: dict) -> bool:
-        return license.get("text", {}).get("content", "") != ""
+        if license.get("text", {}).get("content", "") != "":
+            return True
+        else:
+            return False
 
     def _has_url(self, license: dict) -> bool:
-        return license.get("url", "") != ""
+        if license.get("url", "") != "":
+            return True
+        else:
+            return False
 
     def _has_name_only(self, license: dict) -> bool:
         # Any fields other than name, text, or url mean the license shouldn't be deleted.
diff --git a/cdxev/auxiliary/identity.py b/cdxev/auxiliary/identity.py
index 70e95c4d..3069038f 100644
--- a/cdxev/auxiliary/identity.py
+++ b/cdxev/auxiliary/identity.py
@@ -79,13 +79,13 @@ class SWID(dict):
     """
 
     def __str__(self) -> str:
-        return "tagId: " + self["tagId"]
+        return "tagId: " + str(self["tagId"])
 
     def __eq__(self, other: object) -> bool:
         return isinstance(other, SWID) and self["tagId"] == other["tagId"]
 
     def __hash__(self) -> int:  # type: ignore[override]
-        return self["tagId"].__hash__()
+        return self["tagId"].__hash__()  # type: ignore
 
 
 @dataclass(init=True, frozen=True, eq=True)
diff --git a/cdxev/auxiliary/sbomFunctions.py b/cdxev/auxiliary/sbomFunctions.py
index 6af48d00..3bba35c1 100644
--- a/cdxev/auxiliary/sbomFunctions.py
+++ b/cdxev/auxiliary/sbomFunctions.py
@@ -11,6 +11,8 @@
 from cyclonedx.model.component import Component
 from dateutil.parser import parse
 
+from cdxev.error import AppError
+
 logger = logging.getLogger(__name__)
 
 
@@ -415,8 +417,14 @@ def deserialize(sbom: dict) -> Bom:
         sbom.pop(
             "compositions"
         )  # compositions need to be removed till the model supports those
-    deserialized_bom = Bom.from_json(data=sbom)  # type: ignore
-    return deserialized_bom
+    deserialized_bom = Bom.from_json(data=sbom)  # type:ignore[attr-defined]
+    if isinstance(deserialized_bom, Bom):
+        return deserialized_bom
+    else:
+        raise AppError(
+            "Failed deserialization",
+            ("Deserialization of the SBOM into the CycloneDX Python Library failed."),
+        )
 
 
 def extract_cyclonedx_components(
diff --git a/cdxev/initialize_sbom.py b/cdxev/initialize_sbom.py
index a4622342..b484344f 100644
--- a/cdxev/initialize_sbom.py
+++ b/cdxev/initialize_sbom.py
@@ -5,21 +5,14 @@
 from typing import Any, Union
 from uuid import uuid4
 
-from cyclonedx.model import (  # type: ignore
-    ExternalReference,
-    ExternalReferenceType,
-    XsUri,
-)
-from cyclonedx.model.bom import Bom, BomMetaData  # type: ignore
-from cyclonedx.model.bom_ref import BomRef  # type: ignore
-from cyclonedx.model.component import Component, ComponentType  # type: ignore
-from cyclonedx.model.contact import (  # type: ignore
-    OrganizationalContact,
-    OrganizationalEntity,
-)
-from cyclonedx.model.dependency import Dependency  # type: ignore
+from cyclonedx.model import ExternalReference, ExternalReferenceType, XsUri
+from cyclonedx.model.bom import Bom, BomMetaData
+from cyclonedx.model.bom_ref import BomRef
+from cyclonedx.model.component import Component, ComponentType
+from cyclonedx.model.contact import OrganizationalContact, OrganizationalEntity
+from cyclonedx.model.dependency import Dependency
 from cyclonedx.model.tool import Tool
-from cyclonedx.output.json import JsonV1Dot6  # type: ignore
+from cyclonedx.output.json import JsonV1Dot6
 
 from cdxev import pkg
 
diff --git a/cdxev/list_command.py b/cdxev/list_command.py
index d055af35..ac9a848b 100644
--- a/cdxev/list_command.py
+++ b/cdxev/list_command.py
@@ -12,27 +12,18 @@
 logger = logging.getLogger(__name__)
 
 
-def print_license(license: dict) -> str:
-    if license.get("expression", ""):
-        return license.get("expression", "")
-    elif license.get("license", {}).get("id", ""):
-        return license.get("license", {}).get("id", "")
-    else:
-        return license.get("license", {}).get("name", "")
-
-
 def extract_string_from_license(license: License) -> str:
     if isinstance(license, DisjunctiveLicense):
         if license.id is not None:
-            return license.id
+            return str(license.id)
         elif license.name is not None:
-            return license.name
+            return str(license.name)
         else:
             return ""
 
     elif isinstance(license, LicenseExpression):
         if license.value is not None:
-            return license.value
+            return str(license.value)
         else:
             return ""
     else:
@@ -48,7 +39,7 @@ def extract_license_strings_from_licenses(licenses: list[License]) -> list[str]:
     return license_list
 
 
-def extract_metadata_license_information(metadata: BomMetaData) -> dict:
+def extract_metadata_license_information(metadata: BomMetaData) -> dict[str, Any]:
     if metadata.component is not None:
         metadata_component = metadata.component
         software_information: dict[str, Any] = {}
@@ -115,7 +106,7 @@ def write_list_to_str(str_list: list[str], division_character: str = "\n") -> st
     return string
 
 
-def write_license_dict_to_txt(info_dict: dict) -> str:
+def write_license_dict_to_txt(info_dict: dict[str, Any]) -> str:
     string = ""
 
     if info_dict.get("name", ""):
@@ -137,7 +128,7 @@ def write_license_dict_to_txt(info_dict: dict) -> str:
     return string
 
 
-def write_license_dict_to_csv(info_dict: dict) -> str:
+def write_license_dict_to_csv(info_dict: dict[str, Any]) -> str:
     string = ""
 
     string += '"' + info_dict.get("name", "") + '"'
@@ -153,7 +144,7 @@ def write_license_dict_to_csv(info_dict: dict) -> str:
 
 
 def write_license_information_to_txt(
-    software_information: dict, component_information: list[dict]
+    software_information: dict[str, Any], component_information: list[dict[str, Any]]
 ) -> str:
 
     string = write_license_dict_to_txt(software_information)
@@ -176,8 +167,8 @@ def write_license_information_to_txt(
 
 
 def write_license_information_to_csv(
-    software_information: dict,
-    component_information: list[dict],
+    software_information: dict[str, Any],
+    component_information: list[dict[str, Any]],
 ) -> str:
     string = "Name,Copyright,Licenses"
 
@@ -293,7 +284,7 @@ def list_components(sbom: Bom, format: str = "txt") -> str:
     return string
 
 
-def list_command(sbom: dict, operation: str, format: str = "txt") -> str:
+def list_command(sbom: dict, operation: str, format: str = "txt") -> str:  # type: ignore
     """
     Lists specific content of the SBOM.
 
diff --git a/cdxev/merge.py b/cdxev/merge.py
index 21fbae1e..3429f5c0 100644
--- a/cdxev/merge.py
+++ b/cdxev/merge.py
@@ -94,7 +94,7 @@ def merge_components(governing_sbom: dict, sbom_to_be_merged: dict) -> t.List[di
                 )
                 list_of_merged_components.append(component)
                 list_of_merged_bom_refs.append(new_bom_ref)
-    return list_of_merged_components
+    return list_of_merged_components  # type:ignore [no-any-return]
 
 
 def merge_dependency(
diff --git a/cdxev/set.py b/cdxev/set.py
index aa518143..e6127e85 100644
--- a/cdxev/set.py
+++ b/cdxev/set.py
@@ -6,8 +6,8 @@
 import typing as t
 from dataclasses import dataclass, field, fields
 
-import univers.version_range  # type:ignore
-import univers.versions  # type:ignore
+import univers.version_range  # type:ignore[import-untyped]
+import univers.versions  # type:ignore[import-untyped]
 
 from cdxev.auxiliary.identity import ComponentIdentity, Coordinates, Key, KeyType
 from cdxev.auxiliary.sbomFunctions import walk_components
@@ -150,7 +150,7 @@ def create(
                 group=component.get("group"),
                 version_range=component.get("version-range", ""),
             )
-            return UpdateIdentity(coordinates)  # type:ignore
+            return UpdateIdentity(coordinates)
 
         else:
             return super().create(component, allow_unsafe)
diff --git a/cdxev/validator/helper.py b/cdxev/validator/helper.py
index f0ff9c4a..4210af34 100644
--- a/cdxev/validator/helper.py
+++ b/cdxev/validator/helper.py
@@ -31,7 +31,7 @@ def open_schema(
                     "Path does not exist or is not a file: " + str(schema_path),
                 )
             with schema_path.open() as fp:
-                return json.load(fp)
+                return json.load(fp)  # type:ignore [no-any-return]
     except OSError as e:
         raise AppError("Schema not loaded", str(e)) from e
     except json.JSONDecodeError as e:
@@ -55,7 +55,14 @@ def _get_builtin_schema(schema_type: str, spec_version: str) -> dict:
             f"schema type '{schema_type}'.",
         )
     schema_json = schema_file.read_text()
-    return json.loads(schema_json)
+    schema = json.loads(schema_json)
+    if isinstance(schema, dict):
+        return schema
+    else:
+        raise AppError(
+            "Schema error",
+            ("Loaded builtin schema is not of type dict"),
+        )
 
 
 def load_spdx_schema() -> dict:
@@ -63,7 +70,14 @@ def load_spdx_schema() -> dict:
         resources.files("cdxev.auxiliary.schema") / "spdx.schema.json"
     )
     with path_to_embedded_schema.open() as f:
-        return json.load(f)
+        schema = json.load(f)
+        if isinstance(schema, dict):
+            return schema
+        else:
+            raise AppError(
+                "SPDX schema error",
+                ("Loaded SPDX schema is not type dict"),
+            )
 
 
 def validate_filename(
diff --git a/cdxev/validator/validate.py b/cdxev/validator/validate.py
index 305dc27e..ab3aae85 100644
--- a/cdxev/validator/validate.py
+++ b/cdxev/validator/validate.py
@@ -100,9 +100,11 @@ def validate_sbom(
         )
         for error in sorted(v.iter_errors(sbom), key=str):
             try:
-                if error.validator == "required" and error.validator_value == [
-                    "this_is_an_externally_described_component"
-                ]:
+                if (
+                    error.validator == "required"  # type: ignore[comparison-overlap]
+                    and error.validator_value
+                    == ["this_is_an_externally_described_component"]
+                ):
                     # This requirement in the schema allows us to produce warnings.
                     comp = t.cast(dict, error.instance)
                     if "bom-ref" in comp:
@@ -202,7 +204,7 @@ def validate_sbom(
                     errors.append(
                         f"{error_path}'{error.absolute_path[-1]}' should not be empty"
                     )
-                elif error.validator == "pattern":
+                elif error.validator == "pattern":  # type: ignore[comparison-overlap]
                     errors.append(error_path + error.message.replace("\\", ""))
                 else:
                     errors.append(error_path + error.message)
diff --git a/pyproject.toml b/pyproject.toml
index d066cf20..c3e1331b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,8 +73,34 @@ build_command = "pip install poetry && poetry build"
 packages = "cdxev"
 # Excludes tests even when mypy is invoked with a path (as the VS Code extension does, for instance)
 exclude = ['tests/']
-disallow_untyped_defs = true
-no_error_summary = true
+strict = true
+
+# Allow generic types for certain files
+[[tool.mypy.overrides]]
+module = [
+    "cdxev.merge",
+    "cdxev.merge_vex",
+    "cdxev.set",
+    "cdxev.amend.operations",
+    "cdxev.amend.license",
+    "cdxev.amend.command",
+    "cdxev.__main__",
+    "cdxev.validator.helper",
+    "cdxev.validator.validate",
+    "cdxev.auxiliary.output",
+    "cdxev.auxiliary.sbomFunctions",
+    "cdxev.auxiliary.filename_gen",
+    "cdxev.auxiliary.identity",
+    "cdxev.validator.customreports",
+    "cdxev.build_public_bom"
+]
+disallow_any_generics = false
+
+[[tool.mypy.overrides]]
+module = [
+    "cdxev.__main__",
+]
+warn_return_any = false
 
 [tool.coverage.run]
 source = ["cdxev"]