From fbfbc07236a8f79a6b308036af66bfc733c1cf54 Mon Sep 17 00:00:00 2001 From: "Roger G. Coram" Date: Mon, 20 Jan 2025 16:01:07 +0000 Subject: [PATCH] feat: update processing of AAR CS file/codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - previous `BNCH…` codes have been updated with `BAI…` codes - update processing of AAR/CS file to accommodate the above, in line with recent changes for `input_schemas` --- .../src/pipeline/input_schemas/__init__.py | 62 +---- .../input_schemas/aar_central_services.py | 249 ++++++++++++++++++ .../academies/central_services.py | 35 ++- .../tests/unit/pre_processing/conftest.py | 2 +- 4 files changed, 278 insertions(+), 70 deletions(-) create mode 100644 data-pipeline/src/pipeline/input_schemas/aar_central_services.py diff --git a/data-pipeline/src/pipeline/input_schemas/__init__.py b/data-pipeline/src/pipeline/input_schemas/__init__.py index 119eaf854..a8d0571b2 100644 --- a/data-pipeline/src/pipeline/input_schemas/__init__.py +++ b/data-pipeline/src/pipeline/input_schemas/__init__.py @@ -4,6 +4,12 @@ aar_academies_column_mappings, aar_academies_index_col, ) +from .aar_central_services import ( # noqa + aar_central_services, + aar_central_services_column_eval, + aar_central_services_column_mappings, + aar_central_services_index_col, +) from .census_pupils import ( # noqa pupil_census, pupil_census_column_mappings, @@ -178,62 +184,6 @@ ks4_index_col = "URN" ks4 = {"URN": "Int64", "ATT8SCR": "float", "P8MEA": "float", "P8_BANDING": "string"} -aar_central_services_index_col = "Lead_UPIN" -aar_central_services = { - "Lead_UPIN": "Int64", - "Company_Number": "string", - "Company_Name": "string", - "BNCH11110T (EFA Revenue Grants)": "float", - "BNCH11131 (DfE Family Revenue Grants)": "float", - "BNCH11141 (SEN)": "float", - "BNCH11142 (Other Revenue)": "float", - "BNCH11151 (Other Government Revenue Grants)": "float", - "BNCH11161 (Government source (non-grant))": "float", - "BNCH11162 (Academies)": "float", - "BNCH11163 (Non- Government)": "float", - "BNCH11123-BTI011-A (MAT Central services - Income)": "float", - "BNCH11201 (Income from facilities and services)": "float", - "BNCH11202 (Income from catering)": "float", - "BNCH11203 (Receipts from supply teacher insurance claims)": "float", - "BNCH11300T (Voluntary income)": "float", - "BNCH11204 (Other income - revenue)": "float", - "BNCH11205 (Other Income from facilities and services)": "float", - "BNCH11400T (Investment income)": "float", - "BNCH21706 (Administrative supplies - non educational)": "float", - "BNCH21106 (Catering staff)": "float", - "BNCH21701 (Catering supplies)": "float", - "BNCH21707 (Direct revenue financing (Revenue contributions to capital))": "float", - "BNCH21602 (ICT learning resources)": "float", - "BNCH21603 (Examination fees)": "float", - "BNCH21601 (Learning resources (not ICT equipment))": "float", - "BNCH21104 (Administrative and clerical staff)": "float", - "BNCH21703 (Auditor costs)": "float", - "BNCH21107 (Other staff)": "float", - "BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))": "float", - "BNCH21702 (Professional Services - non-curriculum)": "float", - "BNCH21301 (Maintenance of premises)": "float", - "BNCH21405 (Grounds maintenance)": "float", - "BNCH21201 (Indirect employee expenses)": "float", - "BNCH21801 (Interest charges for Loan and bank)": "float", - "BNCH21705 (Other insurance premiums)": "float", - "BNCH21802 (PFI Charges)": "float", - "BNCH21404 (Rent and rates)": "float", - "BNCH21501 (Special facilities)": "float", - "BNCH21202 (Staff development and training)": "float", - "BNCH21203 (Staff-related insurance)": "float", - "BNCH21204 (Supply teacher insurance)": "float", - "BNCH21401 (Cleaning and caretaking)": "float", - "BNCH21406 (Other occupation costs)": "float", - "BNCH21105 (Premises staff)": "float", - "BNCH21101 (Teaching staff)": "float", - "BNCH21102 (Supply teaching staff - extra note in guidance)": "float", - "BNCH21103 (Education support staff)": "float", - "BNCH21604 (Educational Consultancy)": "float", - "BNCH21606 (Agency supply teaching staff)": "float", - "BNCH21403 (Energy)": "float", - "BNCH21402 (Water and sewerage)": "float", -} - bfr_sofa_cols = { "TrustUPIN": "Int64", "Title": "string", diff --git a/data-pipeline/src/pipeline/input_schemas/aar_central_services.py b/data-pipeline/src/pipeline/input_schemas/aar_central_services.py new file mode 100644 index 000000000..78881249e --- /dev/null +++ b/data-pipeline/src/pipeline/input_schemas/aar_central_services.py @@ -0,0 +1,249 @@ +aar_central_services_index_col = "Lead_UPIN" + +aar_central_services = { + "default": { + "Lead_UPIN": "Int64", + "Company_Number": "string", + "BNCH11110T (EFA Revenue Grants)": "float", + "BNCH11131 (DfE Family Revenue Grants)": "float", + "BNCH11141 (SEN)": "float", + "BNCH11142 (Other Revenue)": "float", + "BNCH11151 (Other Government Revenue Grants)": "float", + "BNCH11161 (Government source (non-grant))": "float", + "BNCH11162 (Academies)": "float", + "BNCH11163 (Non- Government)": "float", + "BNCH11201 (Income from facilities and services)": "float", + "BNCH11202 (Income from catering)": "float", + "BNCH11203 (Receipts from supply teacher insurance claims)": "float", + "BNCH11300T (Voluntary income)": "float", + "BNCH11204 (Other income - revenue)": "float", + "BNCH11205 (Other Income from facilities and services)": "float", + "BNCH11400T (Investment income)": "float", + "BNCH21706 (Administrative supplies - non educational)": "float", + "BNCH21106 (Catering staff)": "float", + "BNCH21701 (Catering supplies)": "float", + "BNCH21707 (Direct revenue financing (Revenue contributions to capital))": "float", + "BNCH21602 (ICT learning resources)": "float", + "BNCH21603 (Examination fees)": "float", + "BNCH21601 (Learning resources (not ICT equipment))": "float", + "BNCH21104 (Administrative and clerical staff)": "float", + "BNCH21703 (Auditor costs)": "float", + "BNCH21107 (Other staff)": "float", + "BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))": "float", + "BNCH21702 (Professional Services - non-curriculum)": "float", + "BNCH21301 (Maintenance of premises)": "float", + "BNCH21405 (Grounds maintenance)": "float", + "BNCH21201 (Indirect employee expenses)": "float", + "BNCH21801 (Interest charges for Loan and bank)": "float", + "BNCH21705 (Other insurance premiums)": "float", + "BNCH21802 (PFI Charges)": "float", + "BNCH21404 (Rent and rates)": "float", + "BNCH21501 (Special facilities)": "float", + "BNCH21202 (Staff development and training)": "float", + "BNCH21203 (Staff-related insurance)": "float", + "BNCH21204 (Supply teacher insurance)": "float", + "BNCH21401 (Cleaning and caretaking)": "float", + "BNCH21406 (Other occupation costs)": "float", + "BNCH21105 (Premises staff)": "float", + "BNCH21101 (Teaching staff)": "float", + "BNCH21102 (Supply teaching staff - extra note in guidance)": "float", + "BNCH21103 (Education support staff)": "float", + "BNCH21604 (Educational Consultancy)": "float", + "BNCH21606 (Agency supply teaching staff)": "float", + "BNCH21403 (Energy)": "float", + "BNCH21402 (Water and sewerage)": "float", + }, + 2023: { + "Lead_UPIN": "Int64", + "Company_Number": "string", + "BNCH11110T (EFA Revenue Grants)": "float", + "BNCH11131 (DfE Family Revenue Grants)": "float", + "BNCH11141 (SEN)": "float", + "BNCH11142 (Other Revenue)": "float", + "BNCH11151 (Other Government Revenue Grants)": "float", + "BNCH11161 (Government source (non-grant))": "float", + "BNCH11162 (Academies)": "float", + "BNCH11163 (Non- Government)": "float", + "BNCH11123-BTI011-A (MAT Central services - Income)": "float", + "BNCH11201 (Income from facilities and services)": "float", + "BNCH11202 (Income from catering)": "float", + "BNCH11203 (Receipts from supply teacher insurance claims)": "float", + "BNCH11300T (Voluntary income)": "float", + "BNCH11204 (Other income - revenue)": "float", + "BNCH11205 (Other Income from facilities and services)": "float", + "BNCH11400T (Investment income)": "float", + "BNCH21706 (Administrative supplies - non educational)": "float", + "BNCH21106 (Catering staff)": "float", + "BNCH21701 (Catering supplies)": "float", + "BNCH21707 (Direct revenue financing (Revenue contributions to capital))": "float", + "BNCH21602 (ICT learning resources)": "float", + "BNCH21603 (Examination fees)": "float", + "BNCH21601 (Learning resources (not ICT equipment))": "float", + "BNCH21104 (Administrative and clerical staff)": "float", + "BNCH21703 (Auditor costs)": "float", + "BNCH21107 (Other staff)": "float", + "BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))": "float", + "BNCH21702 (Professional Services - non-curriculum)": "float", + "BNCH21301 (Maintenance of premises)": "float", + "BNCH21405 (Grounds maintenance)": "float", + "BNCH21201 (Indirect employee expenses)": "float", + "BNCH21801 (Interest charges for Loan and bank)": "float", + "BNCH21705 (Other insurance premiums)": "float", + "BNCH21802 (PFI Charges)": "float", + "BNCH21404 (Rent and rates)": "float", + "BNCH21501 (Special facilities)": "float", + "BNCH21202 (Staff development and training)": "float", + "BNCH21203 (Staff-related insurance)": "float", + "BNCH21204 (Supply teacher insurance)": "float", + "BNCH21401 (Cleaning and caretaking)": "float", + "BNCH21406 (Other occupation costs)": "float", + "BNCH21105 (Premises staff)": "float", + "BNCH21101 (Teaching staff)": "float", + "BNCH21102 (Supply teaching staff - extra note in guidance)": "float", + "BNCH21103 (Education support staff)": "float", + "BNCH21604 (Educational Consultancy)": "float", + "BNCH21606 (Agency supply teaching staff)": "float", + "BNCH21403 (Energy)": "float", + "BNCH21402 (Water and sewerage)": "float", + }, + 2024: { + "Lead_UPIN": "Int64", + "Company_Number": "string", + "BTI050": "float", + "BTI061": "float", + "BTI030": "float", + "BTI040": "float", + "BTI060": "float", + "BTI070": "float", + "BTI080": "float", + "BTI090": "float", + "BTI100": "float", + "BTI110": "float", + "BTI120": "float", + "BTI130": "float", + "BTI140": "float", + "BTI150": "float", + "BTE010": "float", + "BTE020": "float", + "BTE030": "float", + "BTE040": "float", + "BTE050": "float", + "BTE060": "float", + "BTE070": "float", + "BTE080": "float", + "BTE090": "float", + "BTE110": "float", + "BTE100": "float", + "BTE120": "float", + "BTE130": "float", + "BTE140": "float", + "BTE150": "float", + "BTE160": "float", + "BTE170": "float", + "BTE180": "float", + "BTE190": "float", + "BTE200": "float", + "BTE210": "float", + "BTE220": "float", + "BTE230": "float", + "BTE240": "float", + "BTE250": "float", + "BTE300": "float", + "BTE260": "float", + "BTE270": "float", + "BTE280": "float", + "BTE290": "float", + "BTE320": "float", + "BTE310": "float", + "BTI170": "float", + "BTB030": "float", + "BAB030-T": "float", + "BTI101": "float", + "BTI011-A": "float", + }, +} + +aar_central_services_column_mappings = { + "default": {}, + 2024: { + "BTI050": "BNCH11101 (Start-up grants)", + "BTI061": "BNCHBAI061 (Coronavirus Govt Funding)", + "BTI030": "BNCH11141 (SEN)", + "BTI040": "BNCH11142 (Other Revenue)", + "BTI060": "BNCH11151 (Other Government Revenue Grants)", + "BTI070": "BNCH11161 (Government source (non-grant))", + "BTI080": "BNCH11162 (Academies)", + "BTI090": "BNCH11163 (Non- Government)", + "BTI100": "BNCH11201 (Income from facilities and services)", + "BTI110": "BNCH11202 (Income from catering)", + "BTI120": "BNCH11203 (Receipts from supply teacher insurance claims)", + "BTI130": "BNCH11204 (Other income - revenue)", + "BTI140": "BNCH11301 (Donations and/or voluntary funds - revenue)", + "BTI150": "BNCH11401 (Investment income)", + "BTE010": "BNCH21101 (Teaching staff)", + "BTE020": "BNCH21102 (Supply teaching staff - extra note in guidance)", + "BTE030": "BNCH21103 (Education support staff)", + "BTE040": "BNCH21104 (Administrative and clerical staff)", + "BTE050": "BNCH21105 (Premises staff)", + "BTE060": "BNCH21106 (Catering staff)", + "BTE070": "BNCH21107 (Other staff)", + "BTE080": "BNCH21201 (Indirect employee expenses)", + "BTE090": "BNCH21202 (Staff development and training)", + "BTE110": "BNCH21203 (Staff-related insurance)", + "BTE100": "BNCH21204 (Supply teacher insurance)", + "BTE120": "BNCH21301 (Maintenance of premises)", + "BTE130": "BNCH21401 (Cleaning and caretaking)", + "BTE140": "BNCH21402 (Water and sewerage)", + "BTE150": "BNCH21403 (Energy)", + "BTE160": "BNCH21404 (Rent and rates)", + "BTE170": "BNCH21405 (Grounds maintenance)", + "BTE180": "BNCH21406 (Other occupation costs)", + "BTE190": "BNCH21501 (Special facilities)", + "BTE200": "BNCH21601 (Learning resources (not ICT equipment))", + "BTE210": "BNCH21602 (ICT learning resources)", + "BTE220": "BNCH21603 (Examination fees)", + "BTE230": "BNCH21604 (Educational Consultancy)", + "BTE240": "BNCH21606 (Agency supply teaching staff)", + "BTE250": "BNCH21701 (Catering supplies)", + "BTE300": "BNCH21702 (Professional Services - non-curriculum)", + "BTE260": "BNCH21703 (Auditor costs)", + "BTE270": "BNCH21705 (Other insurance premiums)", + "BTE280": "BNCH21706 (Administrative supplies - non educational)", + "BTE290": "BNCH21707 (Direct revenue financing (Revenue contributions to capital))", + "BTE320": "BNCH21801 (Interest charges for Loan and bank)", + "BTE310": "BNCH21802 (PFI Charges)", + "BTI170": "BNCH43001 (Contributions from Academies to Trust)", + "BTB030": "BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))", + "BAB030-T": "BNCH44001Bench (Closing Balance (Restricted and Unrestricted Funds))", + "BTI101": "BNCH11205 (Other Income from facilities and services)", + "BTI011-A": "BNCH11123-BTI011-A (MAT Central services - Income)", + }, +} + +aar_central_services_column_eval = { + "default": { + "BNCH11123-BAI011-A (Academies - Income)": "0.0", + }, + 2023: {}, + 2024: { + "BNCH11300T (Voluntary income)": "`BNCH11301 (Donations and/or voluntary funds - revenue)`", + # "BNCH11122 (Other)": "0.0", + "BNCH11110T (EFA Revenue Grants)": "`BNCH11101 (Start-up grants)` + `BNCHBAI061 (Coronavirus Govt Funding)`", + "BNCH11131 (DfE Family Revenue Grants)": "0.0", + # "BNCH11140T (Local Authority Revenue Grants)": "[BTI030] + [BTI040]", + # "BNCH11100T (Grant funding)": "[BTI050] + 0 + 0 + [BTI030] + [BTI040] + [BTI060] + [BTI061]", + # "BNCH11160T (Other Revenue Income)": "[BTI070] + [BTI080] + [BTI090]", + # "BNCH11200T (Self-generated income)": "[BTI100] + [BTI110] + [BTI120] + [BTI130] + [BTI101]", + "BNCH11400T (Investment income)": "`BNCH11401 (Investment income)`", + # "BNCH11000T (Revenue Income)": "[BTI050] + [BNCH11122] + [BNCH11131] + [BTI030] + [BTI040] + [BTI060] + [BTI070] + [BTI080] + [BTI090] + [BTI100] + [BTI110] + [BTI120] + [BTI130] + [BTI140] + [BTI150] + [BTI101] + [BTI061]", + # "BNCH21100T (Staff costs)": "[BTE010] + [BTE020] + [BTE030] + [BTE040] + [BTE050] + [BTE060] + [BTE070]", + # "BNCH21200T (Staff support costs)": "[BTE080] + [BTE090] + [BTE110] + [BTE100]", + "BNCH21401 (Cleaning and caretaking)": "`BNCH21401 (Cleaning and caretaking)`", + # "BNCH21400T (Other occupancy costs)": "[BTE130] + [BTE140] + [BTE150] + [BTE160] + [BTE170] + [BTE180]", + # "BNCH21600T (Educational supplies and services)": "[BTE200] + [BTE210] + [BTE220] + [BTE230] + [BTE240]", + # "BNCH21700T (Other supplies and services)": "[BTE250] + [BTE300] + [BTE260] + [BTE270] + [BTE280] + [BTE290]", + # "BNCH21800T (Funding costs)": "[BTE320] + [BTE310]", + # "BNCH20000T (Total Costs)": "[BTE010] + [BTE020] + [BTE030] + [BTE040] + [BTE050] + [BTE060] + [BTE070] + [BTE080] + [BTE090] + [BTE110] + [BTE100] + [BTE120] + [BTE130] + [BTE140] + [BTE150] + [BTE160] + [BTE170] + [BTE180] + [BTE190] + [BTE200] + [BTE210] + [BTE220] + [BTE230] + [BTE240] + [BTE250] + [BTE300] + [BTE260] + [BTE270] + [BTE280] + [BTE290] + [BTE320] + [BTE310]) AS [BNCH20000T]", + # "BNCH44001T": "BTB030 + BAB030-T", + }, +} diff --git a/data-pipeline/src/pipeline/pre_processing/academies/central_services.py b/data-pipeline/src/pipeline/pre_processing/academies/central_services.py index 24e585178..a4d4605e6 100644 --- a/data-pipeline/src/pipeline/pre_processing/academies/central_services.py +++ b/data-pipeline/src/pipeline/pre_processing/academies/central_services.py @@ -5,21 +5,30 @@ import pipeline.mappings as mappings -def prepare_central_services_data(cs_path, current_year: int): - central_services_financial = pd.read_csv( - cs_path, - encoding="utf-8", - usecols=lambda x: x in input_schemas.aar_central_services.keys(), - dtype=input_schemas.aar_central_services, +def prepare_central_services_data(cs_path, year: int): + central_services_financial = ( + pd.read_csv( + cs_path, + encoding="utf-8", + usecols=input_schemas.aar_central_services.get( + year, input_schemas.aar_central_services["default"] + ).keys(), + dtype=input_schemas.aar_central_services.get( + year, input_schemas.aar_central_services["default"] + ), + ) + .rename( + columns=input_schemas.aar_central_services_column_mappings.get( + year, input_schemas.aar_central_services_column_mappings["default"] + ), + ) + .dropna(subset=[input_schemas.aar_central_services_index_col]) ) - if ( - "BNCH11123-BTI011-A (MAT Central services - Income)" - not in central_services_financial.columns - ): - central_services_financial[ - "BNCH11123-BTI011-A (MAT Central services - Income)" - ] = 0.0 + for column, eval_ in input_schemas.aar_central_services_column_eval.get( + year, input_schemas.aar_central_services_column_eval["default"] + ).items(): + central_services_financial[column] = central_services_financial.eval(eval_) central_services_financial["Income_Direct revenue finance"] = ( central_services_financial[ diff --git a/data-pipeline/tests/unit/pre_processing/conftest.py b/data-pipeline/tests/unit/pre_processing/conftest.py index fee0ca86c..9d55ddf6d 100644 --- a/data-pipeline/tests/unit/pre_processing/conftest.py +++ b/data-pipeline/tests/unit/pre_processing/conftest.py @@ -342,7 +342,7 @@ def prepared_central_services_data( aar_central_services_data: pd.DataFrame, ) -> pd.DataFrame: return prepare_central_services_data( - StringIO(aar_central_services_data.to_csv()), 2022 + StringIO(aar_central_services_data.to_csv()), 2023 )