Skip to content

Commit

Permalink
Prototype for a model-registration parser (#285)
Browse files Browse the repository at this point in the history
Co-authored-by: Philip Hackstock <[email protected]>
  • Loading branch information
danielhuppmann and phackstock authored Dec 12, 2023
1 parent 3460dd9 commit 44bc04e
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 8 deletions.
41 changes: 34 additions & 7 deletions nomenclature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@
from importlib.metadata import version
from pathlib import Path

import yaml
from setuptools_scm import get_version

from nomenclature.cli import cli # noqa
from nomenclature.codelist import CodeList # noqa
from nomenclature.core import process # noqa
from nomenclature.definition import SPECIAL_CODELIST, DataStructureDefinition # noqa
from nomenclature.processor import ( # noqa
RegionAggregationMapping,
RegionProcessor,
RequiredDataValidator,
)
from nomenclature.countries import countries # noqa
from nomenclature.definition import SPECIAL_CODELIST, DataStructureDefinition # noqa
from nomenclature.processor import RegionAggregationMapping # noqa
from nomenclature.processor import RegionProcessor, RequiredDataValidator

# set up logging
logging.basicConfig(
Expand All @@ -37,7 +35,7 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None):
Parameters
----------
source : str, path, file-like object
Path to Excel file with definitions (codelists).
Path to xlsx file with definitions (codelists).
target : str, path, file-like object
Path to save the parsed definitions as yaml file.
sheet_name : str
Expand All @@ -52,3 +50,32 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None):
SPECIAL_CODELIST.get(col.lower(), CodeList).read_excel(
name="", source=source, sheet_name=sheet_name, col=col, attrs=attrs
).to_yaml(target)


def parse_model_registration(model_registration_file):
"""Parses a model registration file and writes the definitions & mapping yaml files
Parameters
----------
source : str, path, file-like object
Path to xlsx model registration file.
file_name : str
Model-identifier part of the yaml file names.
"""
region_aggregregation_mapping = RegionAggregationMapping.from_file(
model_registration_file
)
file_model_name = "".join(
x if (x.isalnum() or x in "._- ") else "_"
for x in region_aggregregation_mapping.model[0]
)
region_aggregregation_mapping.to_yaml(f"{file_model_name}_mapping.yaml")
if native_regions := [
{
region_aggregregation_mapping.model[
0
]: region_aggregregation_mapping.upload_native_regions
}
]:
with open(f"{file_model_name}_regions.yaml", "w") as f:
yaml.dump(native_regions, f)
97 changes: 96 additions & 1 deletion nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def target_native_region(self) -> str:
"""
return self.rename if self.rename is not None else self.name

def __eq__(self, other: "NativeRegion") -> bool:
return super().__eq__(other)


class CommonRegion(BaseModel):
"""Common region used for model intercomparison.
Expand Down Expand Up @@ -88,6 +91,9 @@ def rename_dict(self):
"rename_dict is only available for single constituent regions"
)

def __eq__(self, other: "CommonRegion") -> bool:
return super().__eq__(other)


class RegionAggregationMapping(BaseModel):
"""Hold information for region processing on a per-model basis.
Expand Down Expand Up @@ -232,8 +238,20 @@ def from_file(cls, file: Union[Path, str]):
This function is used to convert a model mapping yaml file into a dictionary
which is used to initialize a RegionAggregationMapping.
"""

file = Path(file) if isinstance(file, str) else file
FILE_PARSERS = {
".yaml": cls.from_yaml,
".yml": cls.from_yaml,
".xlsx": cls.from_excel,
}
if file.suffix in FILE_PARSERS:
return FILE_PARSERS[file.suffix](file)
raise ValueError(f"No parser implemented for {file.suffix}")

@classmethod
def from_yaml(cls, file: Path) -> "RegionAggregationMapping":
try:
file = Path(file) if isinstance(file, str) else file
with open(file, "r") as f:
mapping_input = yaml.safe_load(f)

Expand Down Expand Up @@ -273,6 +291,54 @@ def from_file(cls, file: Union[Path, str]):
) from error
return cls(**mapping_input)

@classmethod
def from_excel(cls, file) -> "RegionAggregationMapping":
try:
model = pd.read_excel(file, sheet_name="Model", usecols="B", nrows=1).iloc[
0, 0
]

regions = pd.read_excel(file, sheet_name="Common-Region-Mapping", header=3)
regions = regions.drop(
columns=(c for c in regions.columns if c.startswith("Unnamed: "))
).drop(index=0)
# replace nan with None
regions = regions.where(pd.notnull(regions), None)
native = "Native region (as reported by the model)"
rename = "Native region (after renaming)"
native_regions = [
NativeRegion(name=row[native], rename=row[rename])
for row in regions[[native, rename]].to_dict(orient="records")
]
common_region_groups = [
r for r in regions.columns if r not in (native, rename)
]
common_regions = [
CommonRegion(
name=common_region,
constituent_regions=constituent_regions.split(","),
)
for common_region_group in common_region_groups
for common_region, constituent_regions in regions[
[native, common_region_group]
]
.groupby(common_region_group)[native]
.apply(lambda x: ",".join(x))
.to_dict()
.items()
]
# common_regions = [for common_region_group in common_region_groups for ]
except Exception as error:
raise RegionAggregationMappingParsingError(
file=get_relative_path(file), error=str(error)
) from error
return cls(
model=model,
file=file,
native_regions=native_regions,
common_regions=common_regions,
)

@property
def all_regions(self) -> List[str]:
# For the native regions we take the **renamed** (if given) names
Expand All @@ -293,6 +359,13 @@ def common_region_names(self) -> List[str]:
def rename_mapping(self) -> Dict[str, str]:
return {r.name: r.target_native_region for r in self.native_regions or []}

@property
def upload_native_regions(self) -> List[str]:
return [
native_region.target_native_region
for native_region in self.native_regions or []
]

@property
def reverse_rename_mapping(self) -> Dict[str, str]:
return {renamed: original for original, renamed in self.rename_mapping.items()}
Expand Down Expand Up @@ -322,6 +395,28 @@ def check_unexpected_regions(self, df: IamDataFrame) -> None:
"mapping to silence this error."
)

def __eq__(self, other: "RegionAggregationMapping") -> bool:
return self.dict(exclude={"file"}) == other.dict(exclude={"file"})

def to_yaml(self, file) -> None:
dict_representation = {"model": self.model}
if self.native_regions:
dict_representation["native_regions"] = [
{native_region.name: native_region.rename}
if native_region.rename
else native_region.name
for native_region in self.native_regions
]
if self.common_regions:
dict_representation["common_regions"] = [
{common_region.name: common_region.constituent_regions}
for common_region in self.common_regions
]
if self.exclude_regions:
dict_representation["exclude_regions"] = self.exclude_regions
with open(file, "w") as f:
yaml.dump(dict_representation, f, sort_keys=False)


class RegionProcessor(Processor):
"""Region aggregation mappings for scenario processing"""
Expand Down
15 changes: 15 additions & 0 deletions tests/data/region_aggregation/excel_mapping_reference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
model: Model 1.1
native_regions:
- Region 1: Model 1.1|Region 1
- Region 2
- Region 3: Model 1.1|Region 3
common_regions:
- Common Region 1:
- Region 1
- Region 2
- Common Region 2:
- Region 3
- World:
- Region 1
- Region 2
- Region 3
Binary file not shown.
39 changes: 39 additions & 0 deletions tests/test_region_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
process,
)
from nomenclature.error.region import RegionAggregationMappingParsingError
from nomenclature.processor.region import NativeRegion, CommonRegion
from pyam import IamDataFrame, assert_iamframe_equal
from pyam.utils import IAMC_IDX

Expand Down Expand Up @@ -275,3 +276,41 @@ def test_reverse_region_aggregation():
),
)
assert_iamframe_equal(obs, exp)


def test_model_mapping_from_excel():
excel_file = TEST_DATA_DIR / "region_aggregation" / "excel_model_registration.xlsx"
obs = RegionAggregationMapping.from_file(excel_file)
model = "Model 1.1"
exp = RegionAggregationMapping(
model=model,
file=excel_file,
native_regions=[
NativeRegion(name="Region 1", rename=f"{model}|Region 1"),
NativeRegion(name="Region 2"),
NativeRegion(name="Region 3", rename=f"{model}|Region 3"),
],
common_regions=[
CommonRegion(
name="Common Region 1", constituent_regions=["Region 1", "Region 2"]
),
CommonRegion(name="Common Region 2", constituent_regions=["Region 3"]),
CommonRegion(
name="World", constituent_regions=["Region 1", "Region 2", "Region 3"]
),
],
)
assert obs == exp


def test_model_mapping_from_excel_to_yaml(tmp_path):
excel_file = TEST_DATA_DIR / "region_aggregation" / "excel_model_registration.xlsx"
# create a yaml mapping from an excel mapping
RegionAggregationMapping.from_file(excel_file).to_yaml(tmp_path / "mapping.yaml")

obs = RegionAggregationMapping.from_file(tmp_path / "mapping.yaml")

exp = RegionAggregationMapping.from_file(
TEST_DATA_DIR / "region_aggregation" / "excel_mapping_reference.yaml"
)
assert obs == exp

0 comments on commit 44bc04e

Please sign in to comment.