Skip to content

Commit

Permalink
Fixed non mandatory missing components not autofilled with NaN on dat… (
Browse files Browse the repository at this point in the history
#32)

* Fixed non mandatory missing components not autofilled with NaN on datapoints.

* Fixed non mandatory missing components error message. Added tests for missing non mandatory components.

* Fixed flake errors.

* Re-added readme tests. Pyproject.toml version updated to 1.0.1.

* Refactored fill missing non mandatory components method.

* Refactored fill missing non mandatory components method.

---------

Co-authored-by: Francisco Javier Hernández del Caño <[email protected]>
  • Loading branch information
mla2001 and javihern98 authored Oct 17, 2024
1 parent f789e84 commit 3ab31bd
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "vtlengine"
version = "1.0"
version = "1.0.1"
description = "Run and Validate VTL Scripts"
authors = ["MeaningfulData <[email protected]>"]
license = "AGPL-3.0"
Expand Down
8 changes: 8 additions & 0 deletions src/vtlengine/files/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,16 @@ def _validate_pandas(
) -> pd.DataFrame:
warnings.filterwarnings("ignore", category=FutureWarning)
# Identifier checking

id_names = [comp_name for comp_name, comp in components.items() if comp.role == Role.IDENTIFIER]

missing_columns = [name for name in components.keys() if name not in data.columns.tolist()]
if missing_columns:
for name in missing_columns:
if components[name].nullable is False:
raise SemanticError("0-1-1-10", name=dataset_name, comp_name=name)
data[name] = None

for id_name in id_names:
if data[id_name].isnull().any():
raise SemanticError("0-1-1-4", null_identifier=id_name, name=dataset_name)
Expand Down
205 changes: 205 additions & 0 deletions tests/API/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,47 @@ def test_run_only_persistent(script, data_structures, datapoints, value_domains,
assert result == reference


def test_readme_example():
script = """
DS_A := DS_1 * 10;
"""

data_structures = {
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{"name": "Id_1", "type": "Integer", "role": "Identifier", "nullable": False},
{"name": "Me_1", "type": "Number", "role": "Measure", "nullable": True},
],
}
]
}

data_df = pd.DataFrame({"Id_1": [1, 2, 3], "Me_1": [10, 20, 30]})

datapoints = {"DS_1": data_df}

run_result = run(script=script, data_structures=data_structures, datapoints=datapoints)

assert run_result == {
"DS_A": Dataset(
name="DS_A",
components={
"Id_1": Component(
name="Id_1", data_type=DataTypes.Integer, role=Role.IDENTIFIER, nullable=False
),
"Me_1": Component(
name="Me_1", data_type=DataTypes.Number, role=Role.MEASURE, nullable=True
),
},
data=pd.DataFrame(
columns=["Id_1", "Me_1"], index=[0, 1, 2], data=[(1, 100), (2, 200), (3, 300)]
),
)
}


def test_readme_run():
script = """
DS_A := DS_1 * 10;
Expand Down Expand Up @@ -519,3 +560,167 @@ def test_readme_semantic_error():
# Check output dataset on error message
with pytest.raises(SemanticError, match="DS_A"):
semantic_analysis(script=script, data_structures=data_structures)


def test_non_mandatory_fill_at():
script = """
DS_r := DS_1;
"""

data_structures = {
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{"name": "Id_1", "type": "Integer", "role": "Identifier", "nullable": False},
{"name": "Id_2", "type": "String", "role": "Identifier", "nullable": False},
{"name": "Me_1", "type": "String", "role": "Measure", "nullable": True},
{"name": "At_1", "type": "String", "role": "Attribute", "nullable": True},
],
}
]
}

data_df = pd.DataFrame({"Id_1": [1, 1, 2], "Id_2": ['A', 'B', 'A'], "Me_1": ['N', 'N', 'O']})

datapoints = {"DS_1": data_df}

run_result = run(script=script, data_structures=data_structures, datapoints=datapoints)

assert run_result == {
"DS_r": Dataset(
name="DS_r",
components={
"Id_1": Component(
name="Id_1", data_type=DataTypes.Integer, role=Role.IDENTIFIER, nullable=False
),
"Id_2": Component(
name="Id_2", data_type=DataTypes.String, role=Role.IDENTIFIER, nullable=False
),
"Me_1": Component(
name="Me_1", data_type=DataTypes.String, role=Role.MEASURE, nullable=True
),
"At_1": Component(
name="At_1", data_type=DataTypes.String, role=Role.ATTRIBUTE, nullable=True
),
},
data=pd.DataFrame(
columns=["Id_1", "Id_2", "Me_1", "At_1"], index=[0, 1, 2], data=pd.DataFrame({"Id_1": [1, 1, 2], "Id_2": ['A', 'B', 'A'], "Me_1": ['N', 'N', 'O'], "At_1": [None, None, None]})
),
)
}


def test_non_mandatory_fill_me():
script = """
DS_r := DS_1;
"""

data_structures = {
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{"name": "Id_1", "type": "Integer", "role": "Identifier", "nullable": False},
{"name": "Id_2", "type": "String", "role": "Identifier", "nullable": False},
{"name": "Me_1", "type": "String", "role": "Measure", "nullable": True},
{"name": "At_1", "type": "String", "role": "Attribute", "nullable": True},
],
}
]
}

data_df = pd.DataFrame({"Id_1": [1, 1, 2], "Id_2": ['A', 'B', 'A'], "At_1": ['N', 'N', 'O']})

datapoints = {"DS_1": data_df}

run_result = run(script=script, data_structures=data_structures, datapoints=datapoints)

assert run_result == {
"DS_r": Dataset(
name="DS_r",
components={
"Id_1": Component(
name="Id_1", data_type=DataTypes.Integer, role=Role.IDENTIFIER, nullable=False
),
"Id_2": Component(
name="Id_2", data_type=DataTypes.String, role=Role.IDENTIFIER, nullable=False
),
"Me_1": Component(
name="Me_1", data_type=DataTypes.String, role=Role.MEASURE, nullable=True
),
"At_1": Component(
name="At_1", data_type=DataTypes.String, role=Role.ATTRIBUTE, nullable=True
),
},
data=pd.DataFrame(
columns=["Id_1", "Id_2", "Me_1", "At_1"], index=[0, 1, 2], data=pd.DataFrame({"Id_1": [1, 1, 2], "Id_2": ['A', 'B', 'A'], "Me_1": [None, None, None], "At_1": ['N', 'N', 'O']})
),
)
}


def test_mandatory_at_error():
exception_code = "0-1-1-10"

script = """
DS_r := DS_1;
"""

data_structures = {
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{"name": "Id_1", "type": "Integer", "role": "Identifier", "nullable": False},
{"name": "Id_2", "type": "String", "role": "Identifier", "nullable": False},
{"name": "Me_1", "type": "String", "role": "Measure", "nullable": True},
{"name": "At_1", "type": "String", "role": "Attribute", "nullable": False},
],
}
]
}

data_df = pd.DataFrame({"Id_1": [1, 1, 2], "Id_2": ['A', 'B', 'A'], "Me_1": ['N', 'N', 'O']})

datapoints = {"DS_1": data_df}

with pytest.raises(SemanticError) as context:
run(script=script, data_structures=data_structures, datapoints=datapoints)
result = exception_code == str(context.value.args[1])
if result is False:
print(f"\n{exception_code} != {context.value.args[1]}")
assert result


def test_mandatory_me_error():
exception_code = "0-1-1-10"

script = """
DS_r := DS_1;
"""

data_structures = {
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{"name": "Id_1", "type": "Integer", "role": "Identifier", "nullable": False},
{"name": "Id_2", "type": "String", "role": "Identifier", "nullable": False},
{"name": "Me_1", "type": "String", "role": "Measure", "nullable": False},
{"name": "At_1", "type": "String", "role": "Attribute", "nullable": True},
],
}
]
}

data_df = pd.DataFrame({"Id_1": [1, 1, 2], "Id_2": ['A', 'B', 'A'], "At_1": ['N', 'N', 'O']})

datapoints = {"DS_1": data_df}

with pytest.raises(SemanticError) as context:
run(script=script, data_structures=data_structures, datapoints=datapoints)
result = exception_code == str(context.value.args[1])
if result is False:
print(f"\n{exception_code} != {context.value.args[1]}")
assert result

0 comments on commit 3ab31bd

Please sign in to comment.