forked from SocialFinanceDigitalLabs/liia-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/SocialFinanceDigitalLabs/li…
…ia-tools-pipeline into fix_missing_folder_error
- Loading branch information
Showing
16 changed files
with
982 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import logging | ||
from functools import lru_cache | ||
from pathlib import Path | ||
|
||
from pydantic_yaml import parse_yaml_file_as | ||
from ruamel.yaml import YAML | ||
|
||
yaml = YAML() | ||
yaml.preserve_quotes = True | ||
|
||
from liiatools.common.data import PipelineConfig | ||
from liiatools.common.spec.__data_schema import DataSchema | ||
|
||
__ALL__ = ["load_schema", "DataSchema", "Category", "Column"] | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
SCHEMA_DIR = Path(__file__).parent | ||
|
||
|
||
@lru_cache | ||
def load_pipeline_config(): | ||
""" | ||
Load the pipeline config file | ||
:return: Parsed pipeline config file | ||
""" | ||
with open(SCHEMA_DIR / "pipeline.json", "rt") as f: | ||
return parse_yaml_file_as(PipelineConfig, f) | ||
|
||
|
||
@lru_cache | ||
def load_schema() -> DataSchema: | ||
""" | ||
Load the data schema file | ||
:return: The data schema in a DataSchema class | ||
""" | ||
schema_path = Path(SCHEMA_DIR, "pnw_census_schema.yml") | ||
|
||
# If we have no schema files, raise an error | ||
if not schema_path: | ||
raise ValueError(f"No schema files found") | ||
|
||
with open(schema_path, "r", encoding="utf-8") as file: | ||
full_schema = yaml.load(file) | ||
|
||
# Now we can parse the full schema into a DataSchema object from the dict | ||
return DataSchema(**full_schema) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
{ | ||
"retention_columns": { | ||
"year_column": "Year", | ||
"la_column": "LA" | ||
}, | ||
"retention_period": { | ||
"PAN": 7 | ||
}, | ||
"la_signed": { | ||
"Bolton": { | ||
"PAN": "No" | ||
}, | ||
"Bury": { | ||
"PAN": "No" | ||
}, | ||
"Manchester": { | ||
"PAN": "No" | ||
}, | ||
"Oldham": { | ||
"PAN": "No" | ||
}, | ||
"Rochdale": { | ||
"PAN": "No" | ||
}, | ||
"Salford": { | ||
"PAN": "No" | ||
}, | ||
"Stockport": { | ||
"PAN": "No" | ||
}, | ||
"Tameside": { | ||
"PAN": "No" | ||
}, | ||
"Trafford": { | ||
"PAN": "No" | ||
}, | ||
"Wigan": { | ||
"PAN": "No" | ||
} | ||
}, | ||
"table_list": [ | ||
{ | ||
"id": "pnw_census", | ||
"retain": [ | ||
"PAN" | ||
], | ||
"columns": [ | ||
{ | ||
"id": "Looked after child?", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Placing Authority", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Identifier", | ||
"type": "alphanumeric", | ||
"unique_key": true | ||
}, | ||
{ | ||
"id": "Age", | ||
"type": "integer" | ||
}, | ||
{ | ||
"id": "Gender", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Ethnic Group", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "How the placement was sourced", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Placement start date", | ||
"type": "date", | ||
"sort": 0 | ||
}, | ||
{ | ||
"id": "Organisation", | ||
"type": "alphanumeric" | ||
}, | ||
{ | ||
"id": "Provider ID", | ||
"type": "alphanumeric" | ||
}, | ||
{ | ||
"id": "Establishment", | ||
"type": "alphanumeric" | ||
}, | ||
{ | ||
"id": "Registration type", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Establishment registration URN", | ||
"type": "alphanumeric" | ||
}, | ||
{ | ||
"id": "Host Authority", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Primary Placing at Distance Reason", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Type of provision", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "UASC", | ||
"type": "category" | ||
}, | ||
{ | ||
"id": "Total weekly cost", | ||
"type": "float" | ||
}, | ||
{ | ||
"id": "Contribution from Social Care", | ||
"type": "float" | ||
}, | ||
{ | ||
"id": "Contribution from Education", | ||
"type": "float" | ||
}, | ||
{ | ||
"id": "Contribution from Health", | ||
"type": "float" | ||
}, | ||
{ | ||
"id": "SEND", | ||
"type": "alphanumeric" | ||
}, | ||
{ | ||
"id": "Primary SEND category", | ||
"type": "alphanumeric", | ||
"exclude": [ | ||
"PAN" | ||
] | ||
}, | ||
{ | ||
"id": "Primary SEND need", | ||
"type": "alphanumeric", | ||
"exclude": [ | ||
"PAN" | ||
] | ||
}, | ||
{ | ||
"id": "LA", | ||
"type": "string", | ||
"enrich": "la_name" | ||
}, | ||
{ | ||
"id": "Month", | ||
"type": "integer", | ||
"enrich": "month" | ||
}, | ||
{ | ||
"id": "Year", | ||
"type": "integer", | ||
"enrich": "year" | ||
} | ||
] | ||
} | ||
] | ||
} |
Oops, something went wrong.