Skip to content

Commit

Permalink
Solved some parameters and variables type errors. Solved some None ha…
Browse files Browse the repository at this point in the history
…s no _ attribute errors.
  • Loading branch information
mla2001 committed Oct 4, 2024
1 parent 3a13015 commit 8c34be4
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 73 deletions.
14 changes: 7 additions & 7 deletions src/vtlengine/API/_InternalApi.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
from pathlib import Path
from typing import Union, Optional, Dict, List
from typing import Union, Optional, Dict, List, Any

import pandas as pd
from s3fs import S3FileSystem
Expand Down Expand Up @@ -155,8 +155,8 @@ def load_datasets(data_structure: Union[dict, Path, List[Union[dict, Path]]]):
return _load_datastructure_single(data_structure)


def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict, Path]]],
datapoints: Optional[Union[dict, Path, List[Path]]] = None):
def load_datasets_with_data(data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
datapoints: Optional[Union[Dict[str, Any], Path, List[Path]]] = None):
"""
Loads the dataset structures and fills them with the data contained in the datapoints. Returns a dict with the
structure and a pandas dataframe.
Expand Down Expand Up @@ -187,7 +187,7 @@ def load_datasets_with_data(data_structures: Union[dict, Path, List[Union[dict,
return datasets, dict_datapoints


def load_vtl(input: Union[str, Path]):
def load_vtl(input: Union[str, Path]) -> str:
"""
Reads the vtl expression.
Expand Down Expand Up @@ -219,7 +219,7 @@ def _load_single_value_domain(input: Path):
return {vd.name: vd}


def load_value_domains(input: Union[dict, Path]):
def load_value_domains(input: Union[Dict[str, Any], Path]):
"""
Loads the value domains.
Expand All @@ -245,8 +245,8 @@ def load_value_domains(input: Union[dict, Path]):
return _load_single_value_domain(input)


def load_external_routines(input: Union[dict, Path]) -> Optional[
Dict[str, ExternalRoutine]]:
def load_external_routines(input: Union[Dict[str, Any], Path]) -> Optional[
Union[Dict[str, ExternalRoutine], ExternalRoutine, str]]:
"""
Load the external routines.
Expand Down
12 changes: 6 additions & 6 deletions src/vtlengine/API/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Any, Union, List, Optional
from typing import Any, Union, List, Optional, Dict

import pandas as pd
from antlr4 import CommonTokenStream, InputStream
Expand Down Expand Up @@ -65,8 +65,8 @@ def create_ast(text: str) -> Start:


def semantic_analysis(script: Union[str, Path],
data_structures: Union[dict, Path, List[Union[dict, Path]]],
value_domains: Union[dict, Path] = None,
data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
value_domains: Union[Dict[str, Any], Path] = None,
external_routines: Union[str, Path] = None):
"""
Checks if the vtl operation can be done.To do that, it generates the AST with the vtl script
Expand Down Expand Up @@ -133,9 +133,9 @@ class takes all of this information and checks it with the ast generated to
return result


def run(script: Union[str, Path], data_structures: Union[dict, Path, List[Union[dict, Path]]],
datapoints: Union[dict, str, Path, List[Union[str, Path]]],
value_domains: Optional[Union[dict, Path]] = None, external_routines: Optional[Union[str, Path]] = None,
def run(script: Union[str, Path], data_structures: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
datapoints: Union[Dict[str, Any], str, Path, List[Union[str, Path]]],
value_domains: Optional[Union[Dict[str, Any], Path]] = None, external_routines: Optional[Union[str, Path]] = None,
time_period_output_format: str = "vtl",
return_only_persistent=False,
output_folder: Optional[Union[str, Path]] = None) -> Any:
Expand Down
3 changes: 3 additions & 0 deletions src/vtlengine/DataTypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class ScalarType:

default = None

def __name__(self) -> str:
return self.__class__.__name__

def __repr__(self) -> str:
return f"{self.__class__.__name__}"

Expand Down
78 changes: 42 additions & 36 deletions src/vtlengine/Interpreter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ class InterpreterAnalyzer(ASTTemplate):
else_condition_dataset: Optional[List[pd.DataFrame]] = None
ruleset_dataset: Optional[Dataset] = None
rule_data: Optional[pd.DataFrame] = None
ruleset_signature: Dict[str, str] = None
udo_params: List[Dict[str, Any]] = None
ruleset_signature: Optional[Dict[str, str]] = None
udo_params: Optional[List[Dict[str, Any]]] = None
hr_agg_rules_computed: Optional[Dict[str, pd.DataFrame]] = None
ruleset_mode: Optional[str] = None
hr_input: Optional[str] = None
Expand Down Expand Up @@ -337,7 +337,7 @@ def visit_Aggregation(self, node: AST.Aggregation) -> None:
operand = self.aggregation_dataset
elif self.is_from_regular_aggregation:
operand = self.regular_aggregation_dataset
if node.operand is not None:
if node.operand is not None and operand is not None:
op_comp: DataComponent = self.visit(node.operand)
comps_to_keep = {}
for comp_name, comp in self.regular_aggregation_dataset.components.items():
Expand Down Expand Up @@ -443,22 +443,23 @@ def visit_Analytic(self, node: AST.Analytic) -> None:
if node.operand is None:
operand = self.regular_aggregation_dataset
else:
operand_comp = self.visit(node.operand)
measure_names = self.regular_aggregation_dataset.get_measures_names()
dataset_components = self.regular_aggregation_dataset.components.copy()
for name in measure_names:
if name != operand_comp.name:
dataset_components.pop(name)

if self.only_semantic:
data = None
else:
data = self.regular_aggregation_dataset.data[
dataset_components.keys()]
if self.regular_aggregation_dataset is not None:
operand_comp = self.visit(node.operand)
measure_names = self.regular_aggregation_dataset.get_measures_names()
dataset_components = self.regular_aggregation_dataset.components.copy()
for name in measure_names:
if name != operand_comp.name:
dataset_components.pop(name)

if self.only_semantic:
data = None
else:
data = self.regular_aggregation_dataset.data[
dataset_components.keys()]

operand = Dataset(name=self.regular_aggregation_dataset.name,
components=dataset_components,
data=data)
operand = Dataset(name=self.regular_aggregation_dataset.name,
components=dataset_components,
data=data)

else:
operand: Dataset = self.visit(node.operand)
Expand Down Expand Up @@ -595,7 +596,7 @@ def visit_VarID(self, node: AST.VarID) -> Any:
return udo_element
# If it is only the component or dataset name, we rename the node.value
node.value = udo_element
if self.is_from_having or self.is_from_grouping:
if self.aggregation_dataset is not None and (self.is_from_having or self.is_from_grouping):
if node.value not in self.aggregation_dataset.components:
raise SemanticError("1-1-1-10", op=None, comp_name=node.value,
dataset_name=self.aggregation_dataset.name)
Expand All @@ -617,7 +618,7 @@ def visit_VarID(self, node: AST.VarID) -> Any:
raise SemanticError("1-1-6-11", comp_name=node.value)
return self.datasets[node.value]

if self.regular_aggregation_dataset.data is not None:
if self.regular_aggregation_dataset and self.regular_aggregation_dataset.data is not None:
if self.is_from_join and node.value not in self.regular_aggregation_dataset.get_components_names():
is_partial_present = 0
found_comp = None
Expand All @@ -641,7 +642,8 @@ def visit_VarID(self, node: AST.VarID) -> Any:
else:
data = None

return DataComponent(name=node.value,
if self.regular_aggregation_dataset is not None:
return DataComponent(name=node.value,
data=data,
data_type=
self.regular_aggregation_dataset.components[
Expand All @@ -650,7 +652,7 @@ def visit_VarID(self, node: AST.VarID) -> Any:
node.value].role,
nullable=self.regular_aggregation_dataset.components[
node.value].nullable)
if self.is_from_rule:
if self.is_from_rule and self.ruleset_dataset is not None:
if node.value not in self.ruleset_signature:
raise SemanticError("1-1-10-7", comp_name=node.value)
comp_name = self.ruleset_signature[node.value]
Expand Down Expand Up @@ -841,7 +843,8 @@ def visit_RenameNode(self, node: AST.RenameNode) -> Any:
if node.old_name in self.udo_params[-1]:
node.old_name = self.udo_params[-1][node.old_name]

if self.is_from_join and node.old_name not in self.regular_aggregation_dataset.components:
if (self.is_from_join and self.regular_aggregation_dataset is not None and
node.old_name not in self.regular_aggregation_dataset.components):
node.old_name = node.old_name.split('#')[1]

return node
Expand Down Expand Up @@ -899,19 +902,21 @@ def visit_ParamOp(self, node: AST.ParamOp) -> None:
else:
raise NotImplementedError
elif node.op == HAVING:
for id_name in self.aggregation_grouping:
if id_name not in self.aggregation_dataset.components:
raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
if len(self.aggregation_dataset.get_measures()) != 1:
raise ValueError("Only one measure is allowed")
if self.aggregation_dataset is not None:
for id_name in self.aggregation_grouping:
if id_name not in self.aggregation_dataset.components:
raise SemanticError("1-1-2-4", op=node.op, id_name=id_name)
if len(self.aggregation_dataset.get_measures()) != 1:
raise ValueError("Only one measure is allowed")
# Deepcopy is necessary for components to avoid changing the original dataset
self.aggregation_dataset.components = {comp_name: deepcopy(comp) for comp_name, comp in
self.aggregation_dataset.components = {comp_name: deepcopy(comp) for comp_name, comp in
self.aggregation_dataset.components.items()
if comp_name in self.aggregation_grouping
or comp.role == Role.MEASURE}
self.aggregation_dataset.data = self.aggregation_dataset.data[
self.aggregation_dataset.get_identifiers_names() +
self.aggregation_dataset.get_measures_names()]

self.aggregation_dataset.data = self.aggregation_dataset.data[
self.aggregation_dataset.get_identifiers_names() +
self.aggregation_dataset.get_measures_names()]
result = self.visit(node.params)
measure = result.get_measures()[0]
if measure.data_type != Boolean:
Expand Down Expand Up @@ -1081,10 +1086,11 @@ def visit_ParamOp(self, node: AST.ParamOp) -> None:

def visit_DPRule(self, node: AST.DPRule) -> None:
self.is_from_rule = True
if self.ruleset_dataset.data is None:
self.rule_data = None
else:
self.rule_data = self.ruleset_dataset.data.copy()
if self.ruleset_dataset is not None:
if self.ruleset_dataset.data is None:
self.rule_data = None
else:
self.rule_data = self.ruleset_dataset.data.copy()
validation_data = self.visit(node.rule)
if isinstance(validation_data, DataComponent):
if self.rule_data is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/vtlengine/Operators/Analytic.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Analytic(Operator.Unary):
analyticfunc: Specify class method that returns a dataframe using the duckdb library.
Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
"""
sql_op = None
sql_op: Optional[str] = None

@classmethod
def validate(cls, operand: Dataset,
Expand Down
41 changes: 21 additions & 20 deletions src/vtlengine/Operators/Join.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,26 +193,27 @@ def identifiers_validation(cls, operands: List[Dataset], using: List[str]) -> No
return

# (Case B1)
for op_name, identifiers in info.items():
if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
reference_components = cls.reference_dataset.get_components_names()
if not set(using).issubset(reference_components):
raise SemanticError("1-1-13-6", op=cls.op, using_components=using,
reference=cls.reference_dataset.name)

for op_name, identifiers in info.items():
if not set(using).issubset(identifiers):
# (Case B2)
if not set(using).issubset(reference_components):
raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
else:
for op in operands:
if op is not cls.reference_dataset:
for component in using:
if component not in op.get_components_names():
raise SemanticError("1-1-1-10", op=cls.op, comp_name=component,
dataset_name=op.name)
if cls.reference_dataset is not None:
for op_name, identifiers in info.items():
if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
reference_components = cls.reference_dataset.get_components_names()
if not set(using).issubset(reference_components):
raise SemanticError("1-1-13-6", op=cls.op, using_components=using,
reference=cls.reference_dataset.name)

for op_name, identifiers in info.items():
if not set(using).issubset(identifiers):
# (Case B2)
if not set(using).issubset(reference_components):
raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
else:
for op in operands:
if op is not cls.reference_dataset:
for component in using:
if component not in op.get_components_names():
raise SemanticError("1-1-1-10", op=cls.op, comp_name=component,
dataset_name=op.name)


class InnerJoin(Join):
Expand Down
9 changes: 6 additions & 3 deletions src/vtlengine/Operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
binary_implicit_promotion, check_binary_implicit_promotion, check_unary_implicit_promotion, \
unary_implicit_promotion, SCALAR_TYPES_CLASS_REVERSE
from vtlengine.DataTypes.TimeHandling import TimeIntervalHandler, TimePeriodHandler, DURATION_MAPPING
from vtlengine.DataTypes import Integer, Number, String, Boolean, Date, \
Duration, TimePeriod, TimeInterval, Null

from vtlengine.AST.Grammar.tokens import CEIL, FLOOR, ROUND, EQ, NEQ, GT, GTE, LT, LTE, XOR, OR, AND
from vtlengine.Exceptions import SemanticError
Expand All @@ -18,7 +20,8 @@
from vtlengine.Model import Component, Dataset, Role, Scalar, DataComponent, ScalarSet

ALL_MODEL_DATA_TYPES = Union[Dataset, Scalar, DataComponent]
ALL_DATA_TYPES = Union[ScalarType, Dataset, DataComponent, Scalar, ScalarSet]
ALL_SCALAR_TYPES = Union[type[Integer], type[Number], type[String], type[Boolean], type[Date],
type[Duration], type[TimePeriod], type[TimeInterval], type[Null]]

# This allows changing the data type of the Measure in the result Data Set
# when the operator is applied to mono-measure Data Sets.
Expand All @@ -35,8 +38,8 @@ class Operator:
op: str = None
py_op: str = None
spark_op: str = None
type_to_check: ScalarType = None
return_type: ScalarType = None
type_to_check: ALL_SCALAR_TYPES = None
return_type: ALL_SCALAR_TYPES = None

@classmethod
def analyze(cls, *args: Any, **kwargs: Any):
Expand Down

0 comments on commit 8c34be4

Please sign in to comment.