Solved some parameters and variables type errors. Solved some None ha…

…s no _ attribute errors.
Meaningful-Data · Oct 4, 2024 · 8c34be4 · 8c34be4
1 parent 3a13015
commit 8c34be4
Show file tree

Hide file tree

Showing 7 changed files with 86 additions and 73 deletions.
diff --git a/src/vtlengine/API/_InternalApi.py b/src/vtlengine/API/_InternalApi.py
@@ -1,7 +1,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Union, Optional, Dict, List
+from typing import Union, Optional, Dict, List, Any
 
 import pandas as pd
 from s3fs import S3FileSystem
@@ -155,8 +155,8 @@ def load_datasets(data_structure: Union[dict, Path, List[Union[dict, Path]]]):
     return _load_datastructure_single(data_structure)
 
 
-def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict, Path]]],
-                            datapoints: Optional[Union[dict, Path, List[Path]]] = None):
+def load_datasets_with_data(data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
+                            datapoints: Optional[Union[Dict[str, Any], Path, List[Path]]] = None):
     """
     Loads the dataset structures and fills them with the data contained in the datapoints. Returns a dict with the
     structure and a pandas dataframe.
@@ -187,7 +187,7 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
     return datasets, dict_datapoints
 
 
-def load_vtl(input: Union[str, Path]):
+def load_vtl(input: Union[str, Path]) -> str:
     """
     Reads the vtl expression.
 
@@ -219,7 +219,7 @@ def _load_single_value_domain(input: Path):
     return {vd.name: vd}
 
 
-def load_value_domains(input: Union[dict, Path]):
+def load_value_domains(input: Union[Dict[str, Any], Path]):
     """
     Loads the value domains.
 
@@ -245,8 +245,8 @@ def load_value_domains(input: Union[dict, Path]):
     return _load_single_value_domain(input)
 
 
-def load_external_routines(input: Union[dict, Path]) -> Optional[
-    Dict[str, ExternalRoutine]]:
+def load_external_routines(input: Union[Dict[str, Any], Path]) -> Optional[
+    Union[Dict[str, ExternalRoutine], ExternalRoutine, str]]:
     """
     Load the external routines.
 

diff --git a/src/vtlengine/API/__init__.py b/src/vtlengine/API/__init__.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Union, List, Optional
+from typing import Any, Union, List, Optional, Dict
 
 import pandas as pd
 from antlr4 import CommonTokenStream, InputStream
@@ -65,8 +65,8 @@ def create_ast(text: str) -> Start:
 
 
 def semantic_analysis(script: Union[str, Path],
-                      data_structures: Union[dict, Path, List[Union[dict, Path]]],
-                      value_domains: Union[dict, Path] = None,
+                      data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
+                      value_domains: Union[Dict[str, Any], Path] = None,
                       external_routines: Union[str, Path] = None):
     """
     Checks if the vtl operation can be done.To do that, it generates the AST with the vtl script
@@ -133,9 +133,9 @@ class takes all of this information and checks it with the ast generated to
     return result
 
 
-def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[dict, Path]]],
-        datapoints: Union[dict, str, Path, List[Union[str, Path]]],
-        value_domains: Optional[Union[dict, Path]] = None, external_routines: Optional[Union[str, Path]] = None,
+def run(script: Union[str, Path], data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
+        datapoints: Union[Dict[str, Any], str, Path, List[Union[str, Path]]],
+        value_domains: Optional[Union[Dict[str, Any], Path]] = None, external_routines: Optional[Union[str, Path]] = None,
         time_period_output_format: str = "vtl",
         return_only_persistent=False,
         output_folder: Optional[Union[str, Path]] = None) -> Any:

diff --git a/src/vtlengine/DataTypes/__init__.py b/src/vtlengine/DataTypes/__init__.py
@@ -34,6 +34,9 @@ class ScalarType:
 
     default = None
 
+    def __name__(self) -> str:
+        return self.__class__.__name__
+
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}"
 

diff --git a/src/vtlengine/Interpreter/__init__.py b/src/vtlengine/Interpreter/__init__.py
@@ -74,8 +74,8 @@ class InterpreterAnalyzer(ASTTemplate):
     else_condition_dataset: Optional[List[pd.DataFrame]] = None
     ruleset_dataset: Optional[Dataset] = None
     rule_data: Optional[pd.DataFrame] = None
-    ruleset_signature: Dict[str, str] = None
-    udo_params: List[Dict[str, Any]] = None
+    ruleset_signature: Optional[Dict[str, str]] = None
+    udo_params: Optional[List[Dict[str, Any]]] = None
     hr_agg_rules_computed: Optional[Dict[str, pd.DataFrame]] = None
     ruleset_mode: Optional[str] = None
     hr_input: Optional[str] = None
@@ -337,7 +337,7 @@ def visit_Aggregation(self, node: AST.Aggregation) -> None:
             operand = self.aggregation_dataset
         elif self.is_from_regular_aggregation:
             operand = self.regular_aggregation_dataset
-            if node.operand is not None:
+            if node.operand is not None and operand is not None:
                 op_comp: DataComponent = self.visit(node.operand)
                 comps_to_keep = {}
                 for comp_name, comp in self.regular_aggregation_dataset.components.items():
@@ -443,22 +443,23 @@ def visit_Analytic(self, node: AST.Analytic) -> None:
             if node.operand is None:
                 operand = self.regular_aggregation_dataset
             else:
-                operand_comp = self.visit(node.operand)
-                measure_names = self.regular_aggregation_dataset.get_measures_names()
-                dataset_components = self.regular_aggregation_dataset.components.copy()
-                for name in measure_names:
-                    if name != operand_comp.name:
-                        dataset_components.pop(name)
-
-                if self.only_semantic:
-                    data = None
-                else:
-                    data = self.regular_aggregation_dataset.data[
-                        dataset_components.keys()]
+                if self.regular_aggregation_dataset is not None:
+                    operand_comp = self.visit(node.operand)
+                    measure_names = self.regular_aggregation_dataset.get_measures_names()
+                    dataset_components = self.regular_aggregation_dataset.components.copy()
+                    for name in measure_names:
+                        if name != operand_comp.name:
+                            dataset_components.pop(name)
+
+                    if self.only_semantic:
+                        data = None
+                    else:
+                        data = self.regular_aggregation_dataset.data[
+                            dataset_components.keys()]
 
-                operand = Dataset(name=self.regular_aggregation_dataset.name,
-                                  components=dataset_components,
-                                  data=data)
+                    operand = Dataset(name=self.regular_aggregation_dataset.name,
+                                      components=dataset_components,
+                                      data=data)
 
         else:
             operand: Dataset = self.visit(node.operand)
@@ -595,7 +596,7 @@ def visit_VarID(self, node: AST.VarID) -> Any:
                 return udo_element
             # If it is only the component or dataset name, we rename the node.value
             node.value = udo_element
-        if self.is_from_having or self.is_from_grouping:
+        if self.aggregation_dataset is not None and (self.is_from_having or self.is_from_grouping):
             if node.value not in self.aggregation_dataset.components:
                 raise SemanticError("1-1-1-10", op=None, comp_name=node.value,
                                     dataset_name=self.aggregation_dataset.name)
@@ -617,7 +618,7 @@ def visit_VarID(self, node: AST.VarID) -> Any:
                     raise SemanticError("1-1-6-11", comp_name=node.value)
                 return self.datasets[node.value]
 
-            if self.regular_aggregation_dataset.data is not None:
+            if self.regular_aggregation_dataset and self.regular_aggregation_dataset.data is not None:
                 if self.is_from_join and node.value not in self.regular_aggregation_dataset.get_components_names():
                     is_partial_present = 0
                     found_comp = None
@@ -641,7 +642,8 @@ def visit_VarID(self, node: AST.VarID) -> Any:
             else:
                 data = None
 
-            return DataComponent(name=node.value,
+            if self.regular_aggregation_dataset is not None:
+                return DataComponent(name=node.value,
                                  data=data,
                                  data_type=
                                  self.regular_aggregation_dataset.components[
@@ -650,7 +652,7 @@ def visit_VarID(self, node: AST.VarID) -> Any:
                                      node.value].role,
                                  nullable=self.regular_aggregation_dataset.components[
                                      node.value].nullable)
-        if self.is_from_rule:
+        if self.is_from_rule and self.ruleset_dataset is not None:
             if node.value not in self.ruleset_signature:
                 raise SemanticError("1-1-10-7", comp_name=node.value)
             comp_name = self.ruleset_signature[node.value]
@@ -841,7 +843,8 @@ def visit_RenameNode(self, node: AST.RenameNode) -> Any:
                 if node.old_name in self.udo_params[-1]:
                     node.old_name = self.udo_params[-1][node.old_name]
 
-        if self.is_from_join and node.old_name not in self.regular_aggregation_dataset.components:
+        if (self.is_from_join and self.regular_aggregation_dataset is not None and
+                node.old_name not in self.regular_aggregation_dataset.components):
             node.old_name = node.old_name.split('#')[1]
 
         return node
@@ -899,19 +902,21 @@ def visit_ParamOp(self, node: AST.ParamOp) -> None:
             else:
                 raise NotImplementedError
         elif node.op == HAVING:
-            for id_name in self.aggregation_grouping:
-                if id_name not in self.aggregation_dataset.components:
-                    raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
-            if len(self.aggregation_dataset.get_measures()) != 1:
-                raise ValueError("Only one measure is allowed")
+            if self.aggregation_dataset is not None:
+                for id_name in self.aggregation_grouping:
+                    if id_name not in self.aggregation_dataset.components:
+                        raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
+                if len(self.aggregation_dataset.get_measures()) != 1:
+                    raise ValueError("Only one measure is allowed")
             # Deepcopy is necessary for components to avoid changing the original dataset
-            self.aggregation_dataset.components = {comp_name: deepcopy(comp) for comp_name, comp in
+                self.aggregation_dataset.components = {comp_name: deepcopy(comp) for comp_name, comp in
                                                    self.aggregation_dataset.components.items()
                                                    if comp_name in self.aggregation_grouping
                                                    or comp.role == Role.MEASURE}
-            self.aggregation_dataset.data = self.aggregation_dataset.data[
-                self.aggregation_dataset.get_identifiers_names() +
-                self.aggregation_dataset.get_measures_names()]
+
+                self.aggregation_dataset.data = self.aggregation_dataset.data[
+                    self.aggregation_dataset.get_identifiers_names() +
+                    self.aggregation_dataset.get_measures_names()]
             result = self.visit(node.params)
             measure = result.get_measures()[0]
             if measure.data_type != Boolean:
@@ -1081,10 +1086,11 @@ def visit_ParamOp(self, node: AST.ParamOp) -> None:
 
     def visit_DPRule(self, node: AST.DPRule) -> None:
         self.is_from_rule = True
-        if self.ruleset_dataset.data is None:
-            self.rule_data = None
-        else:
-            self.rule_data = self.ruleset_dataset.data.copy()
+        if self.ruleset_dataset is not None:
+            if self.ruleset_dataset.data is None:
+                self.rule_data = None
+            else:
+                self.rule_data = self.ruleset_dataset.data.copy()
         validation_data = self.visit(node.rule)
         if isinstance(validation_data, DataComponent):
             if self.rule_data is not None:

diff --git a/src/vtlengine/Operators/Analytic.py b/src/vtlengine/Operators/Analytic.py
@@ -36,7 +36,7 @@ class Analytic(Operator.Unary):
         analyticfunc: Specify class method that returns a dataframe using the duckdb library.
         Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
     """
-    sql_op = None
+    sql_op: Optional[str] = None
 
     @classmethod
     def validate(cls, operand: Dataset,

diff --git a/src/vtlengine/Operators/Join.py b/src/vtlengine/Operators/Join.py
@@ -193,26 +193,27 @@ def identifiers_validation(cls, operands: List[Dataset], using: List[str]) -> No
             return
 
         # (Case B1)
-        for op_name, identifiers in info.items():
-            if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
-                raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
-        reference_components = cls.reference_dataset.get_components_names()
-        if not set(using).issubset(reference_components):
-            raise SemanticError("1-1-13-6", op=cls.op, using_components=using,
-                                reference=cls.reference_dataset.name)
-
-        for op_name, identifiers in info.items():
-            if not set(using).issubset(identifiers):
-                # (Case B2)
-                if not set(using).issubset(reference_components):
-                    raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
-            else:
-                for op in operands:
-                    if op is not cls.reference_dataset:
-                        for component in using:
-                            if component not in op.get_components_names():
-                                raise SemanticError("1-1-1-10", op=cls.op, comp_name=component,
-                                                    dataset_name=op.name)
+        if cls.reference_dataset is not None:
+            for op_name, identifiers in info.items():
+                if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
+                    raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
+            reference_components = cls.reference_dataset.get_components_names()
+            if not set(using).issubset(reference_components):
+                raise SemanticError("1-1-13-6", op=cls.op, using_components=using,
+                                    reference=cls.reference_dataset.name)
+
+            for op_name, identifiers in info.items():
+                if not set(using).issubset(identifiers):
+                    # (Case B2)
+                    if not set(using).issubset(reference_components):
+                        raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
+                else:
+                    for op in operands:
+                        if op is not cls.reference_dataset:
+                            for component in using:
+                                if component not in op.get_components_names():
+                                    raise SemanticError("1-1-1-10", op=cls.op, comp_name=component,
+                                                        dataset_name=op.name)
 
 
 class InnerJoin(Join):

diff --git a/src/vtlengine/Operators/__init__.py b/src/vtlengine/Operators/__init__.py
@@ -6,6 +6,8 @@
     binary_implicit_promotion, check_binary_implicit_promotion, check_unary_implicit_promotion, \
     unary_implicit_promotion, SCALAR_TYPES_CLASS_REVERSE
 from vtlengine.DataTypes.TimeHandling import TimeIntervalHandler, TimePeriodHandler, DURATION_MAPPING
+from vtlengine.DataTypes import Integer, Number, String, Boolean, Date, \
+    Duration, TimePeriod, TimeInterval, Null
 
 from vtlengine.AST.Grammar.tokens import CEIL, FLOOR, ROUND, EQ, NEQ, GT, GTE, LT, LTE, XOR, OR, AND
 from vtlengine.Exceptions import SemanticError
@@ -18,7 +20,8 @@
 from vtlengine.Model import Component, Dataset, Role, Scalar, DataComponent, ScalarSet
 
 ALL_MODEL_DATA_TYPES = Union[Dataset, Scalar, DataComponent]
-ALL_DATA_TYPES = Union[ScalarType, Dataset, DataComponent, Scalar, ScalarSet]
+ALL_SCALAR_TYPES = Union[type[Integer], type[Number], type[String], type[Boolean], type[Date],
+                    type[Duration], type[TimePeriod], type[TimeInterval], type[Null]]
 
 # This allows changing the data type of the Measure in the result Data Set
 # when the operator is applied to mono-measure Data Sets.
@@ -35,8 +38,8 @@ class Operator:
     op: str = None
     py_op: str = None
     spark_op: str = None
-    type_to_check: ScalarType = None
-    return_type: ScalarType = None
+    type_to_check: ALL_SCALAR_TYPES = None
+    return_type: ALL_SCALAR_TYPES = None
 
     @classmethod
     def analyze(cls, *args: Any, **kwargs: Any):