Skip to content

Commit

Permalink
feat: update processing of AAR CS file/codes
Browse files Browse the repository at this point in the history
- previous `BNCH…` codes have been updated with `BAI…` codes
- update processing of AAR/CS file to accommodate the above, in line with
  recent changes for `input_schemas`
  • Loading branch information
PsypherPunk committed Jan 20, 2025
1 parent 101111c commit fbfbc07
Show file tree
Hide file tree
Showing 4 changed files with 278 additions and 70 deletions.
62 changes: 6 additions & 56 deletions data-pipeline/src/pipeline/input_schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
aar_academies_column_mappings,
aar_academies_index_col,
)
from .aar_central_services import ( # noqa
aar_central_services,
aar_central_services_column_eval,
aar_central_services_column_mappings,
aar_central_services_index_col,
)
from .census_pupils import ( # noqa
pupil_census,
pupil_census_column_mappings,
Expand Down Expand Up @@ -178,62 +184,6 @@
ks4_index_col = "URN"
ks4 = {"URN": "Int64", "ATT8SCR": "float", "P8MEA": "float", "P8_BANDING": "string"}

aar_central_services_index_col = "Lead_UPIN"
aar_central_services = {
"Lead_UPIN": "Int64",
"Company_Number": "string",
"Company_Name": "string",
"BNCH11110T (EFA Revenue Grants)": "float",
"BNCH11131 (DfE Family Revenue Grants)": "float",
"BNCH11141 (SEN)": "float",
"BNCH11142 (Other Revenue)": "float",
"BNCH11151 (Other Government Revenue Grants)": "float",
"BNCH11161 (Government source (non-grant))": "float",
"BNCH11162 (Academies)": "float",
"BNCH11163 (Non- Government)": "float",
"BNCH11123-BTI011-A (MAT Central services - Income)": "float",
"BNCH11201 (Income from facilities and services)": "float",
"BNCH11202 (Income from catering)": "float",
"BNCH11203 (Receipts from supply teacher insurance claims)": "float",
"BNCH11300T (Voluntary income)": "float",
"BNCH11204 (Other income - revenue)": "float",
"BNCH11205 (Other Income from facilities and services)": "float",
"BNCH11400T (Investment income)": "float",
"BNCH21706 (Administrative supplies - non educational)": "float",
"BNCH21106 (Catering staff)": "float",
"BNCH21701 (Catering supplies)": "float",
"BNCH21707 (Direct revenue financing (Revenue contributions to capital))": "float",
"BNCH21602 (ICT learning resources)": "float",
"BNCH21603 (Examination fees)": "float",
"BNCH21601 (Learning resources (not ICT equipment))": "float",
"BNCH21104 (Administrative and clerical staff)": "float",
"BNCH21703 (Auditor costs)": "float",
"BNCH21107 (Other staff)": "float",
"BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))": "float",
"BNCH21702 (Professional Services - non-curriculum)": "float",
"BNCH21301 (Maintenance of premises)": "float",
"BNCH21405 (Grounds maintenance)": "float",
"BNCH21201 (Indirect employee expenses)": "float",
"BNCH21801 (Interest charges for Loan and bank)": "float",
"BNCH21705 (Other insurance premiums)": "float",
"BNCH21802 (PFI Charges)": "float",
"BNCH21404 (Rent and rates)": "float",
"BNCH21501 (Special facilities)": "float",
"BNCH21202 (Staff development and training)": "float",
"BNCH21203 (Staff-related insurance)": "float",
"BNCH21204 (Supply teacher insurance)": "float",
"BNCH21401 (Cleaning and caretaking)": "float",
"BNCH21406 (Other occupation costs)": "float",
"BNCH21105 (Premises staff)": "float",
"BNCH21101 (Teaching staff)": "float",
"BNCH21102 (Supply teaching staff - extra note in guidance)": "float",
"BNCH21103 (Education support staff)": "float",
"BNCH21604 (Educational Consultancy)": "float",
"BNCH21606 (Agency supply teaching staff)": "float",
"BNCH21403 (Energy)": "float",
"BNCH21402 (Water and sewerage)": "float",
}

bfr_sofa_cols = {
"TrustUPIN": "Int64",
"Title": "string",
Expand Down
249 changes: 249 additions & 0 deletions data-pipeline/src/pipeline/input_schemas/aar_central_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
aar_central_services_index_col = "Lead_UPIN"

aar_central_services = {
"default": {
"Lead_UPIN": "Int64",
"Company_Number": "string",
"BNCH11110T (EFA Revenue Grants)": "float",
"BNCH11131 (DfE Family Revenue Grants)": "float",
"BNCH11141 (SEN)": "float",
"BNCH11142 (Other Revenue)": "float",
"BNCH11151 (Other Government Revenue Grants)": "float",
"BNCH11161 (Government source (non-grant))": "float",
"BNCH11162 (Academies)": "float",
"BNCH11163 (Non- Government)": "float",
"BNCH11201 (Income from facilities and services)": "float",
"BNCH11202 (Income from catering)": "float",
"BNCH11203 (Receipts from supply teacher insurance claims)": "float",
"BNCH11300T (Voluntary income)": "float",
"BNCH11204 (Other income - revenue)": "float",
"BNCH11205 (Other Income from facilities and services)": "float",
"BNCH11400T (Investment income)": "float",
"BNCH21706 (Administrative supplies - non educational)": "float",
"BNCH21106 (Catering staff)": "float",
"BNCH21701 (Catering supplies)": "float",
"BNCH21707 (Direct revenue financing (Revenue contributions to capital))": "float",
"BNCH21602 (ICT learning resources)": "float",
"BNCH21603 (Examination fees)": "float",
"BNCH21601 (Learning resources (not ICT equipment))": "float",
"BNCH21104 (Administrative and clerical staff)": "float",
"BNCH21703 (Auditor costs)": "float",
"BNCH21107 (Other staff)": "float",
"BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))": "float",
"BNCH21702 (Professional Services - non-curriculum)": "float",
"BNCH21301 (Maintenance of premises)": "float",
"BNCH21405 (Grounds maintenance)": "float",
"BNCH21201 (Indirect employee expenses)": "float",
"BNCH21801 (Interest charges for Loan and bank)": "float",
"BNCH21705 (Other insurance premiums)": "float",
"BNCH21802 (PFI Charges)": "float",
"BNCH21404 (Rent and rates)": "float",
"BNCH21501 (Special facilities)": "float",
"BNCH21202 (Staff development and training)": "float",
"BNCH21203 (Staff-related insurance)": "float",
"BNCH21204 (Supply teacher insurance)": "float",
"BNCH21401 (Cleaning and caretaking)": "float",
"BNCH21406 (Other occupation costs)": "float",
"BNCH21105 (Premises staff)": "float",
"BNCH21101 (Teaching staff)": "float",
"BNCH21102 (Supply teaching staff - extra note in guidance)": "float",
"BNCH21103 (Education support staff)": "float",
"BNCH21604 (Educational Consultancy)": "float",
"BNCH21606 (Agency supply teaching staff)": "float",
"BNCH21403 (Energy)": "float",
"BNCH21402 (Water and sewerage)": "float",
},
2023: {
"Lead_UPIN": "Int64",
"Company_Number": "string",
"BNCH11110T (EFA Revenue Grants)": "float",
"BNCH11131 (DfE Family Revenue Grants)": "float",
"BNCH11141 (SEN)": "float",
"BNCH11142 (Other Revenue)": "float",
"BNCH11151 (Other Government Revenue Grants)": "float",
"BNCH11161 (Government source (non-grant))": "float",
"BNCH11162 (Academies)": "float",
"BNCH11163 (Non- Government)": "float",
"BNCH11123-BTI011-A (MAT Central services - Income)": "float",
"BNCH11201 (Income from facilities and services)": "float",
"BNCH11202 (Income from catering)": "float",
"BNCH11203 (Receipts from supply teacher insurance claims)": "float",
"BNCH11300T (Voluntary income)": "float",
"BNCH11204 (Other income - revenue)": "float",
"BNCH11205 (Other Income from facilities and services)": "float",
"BNCH11400T (Investment income)": "float",
"BNCH21706 (Administrative supplies - non educational)": "float",
"BNCH21106 (Catering staff)": "float",
"BNCH21701 (Catering supplies)": "float",
"BNCH21707 (Direct revenue financing (Revenue contributions to capital))": "float",
"BNCH21602 (ICT learning resources)": "float",
"BNCH21603 (Examination fees)": "float",
"BNCH21601 (Learning resources (not ICT equipment))": "float",
"BNCH21104 (Administrative and clerical staff)": "float",
"BNCH21703 (Auditor costs)": "float",
"BNCH21107 (Other staff)": "float",
"BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))": "float",
"BNCH21702 (Professional Services - non-curriculum)": "float",
"BNCH21301 (Maintenance of premises)": "float",
"BNCH21405 (Grounds maintenance)": "float",
"BNCH21201 (Indirect employee expenses)": "float",
"BNCH21801 (Interest charges for Loan and bank)": "float",
"BNCH21705 (Other insurance premiums)": "float",
"BNCH21802 (PFI Charges)": "float",
"BNCH21404 (Rent and rates)": "float",
"BNCH21501 (Special facilities)": "float",
"BNCH21202 (Staff development and training)": "float",
"BNCH21203 (Staff-related insurance)": "float",
"BNCH21204 (Supply teacher insurance)": "float",
"BNCH21401 (Cleaning and caretaking)": "float",
"BNCH21406 (Other occupation costs)": "float",
"BNCH21105 (Premises staff)": "float",
"BNCH21101 (Teaching staff)": "float",
"BNCH21102 (Supply teaching staff - extra note in guidance)": "float",
"BNCH21103 (Education support staff)": "float",
"BNCH21604 (Educational Consultancy)": "float",
"BNCH21606 (Agency supply teaching staff)": "float",
"BNCH21403 (Energy)": "float",
"BNCH21402 (Water and sewerage)": "float",
},
2024: {
"Lead_UPIN": "Int64",
"Company_Number": "string",
"BTI050": "float",
"BTI061": "float",
"BTI030": "float",
"BTI040": "float",
"BTI060": "float",
"BTI070": "float",
"BTI080": "float",
"BTI090": "float",
"BTI100": "float",
"BTI110": "float",
"BTI120": "float",
"BTI130": "float",
"BTI140": "float",
"BTI150": "float",
"BTE010": "float",
"BTE020": "float",
"BTE030": "float",
"BTE040": "float",
"BTE050": "float",
"BTE060": "float",
"BTE070": "float",
"BTE080": "float",
"BTE090": "float",
"BTE110": "float",
"BTE100": "float",
"BTE120": "float",
"BTE130": "float",
"BTE140": "float",
"BTE150": "float",
"BTE160": "float",
"BTE170": "float",
"BTE180": "float",
"BTE190": "float",
"BTE200": "float",
"BTE210": "float",
"BTE220": "float",
"BTE230": "float",
"BTE240": "float",
"BTE250": "float",
"BTE300": "float",
"BTE260": "float",
"BTE270": "float",
"BTE280": "float",
"BTE290": "float",
"BTE320": "float",
"BTE310": "float",
"BTI170": "float",
"BTB030": "float",
"BAB030-T": "float",
"BTI101": "float",
"BTI011-A": "float",
},
}

aar_central_services_column_mappings = {
"default": {},
2024: {
"BTI050": "BNCH11101 (Start-up grants)",
"BTI061": "BNCHBAI061 (Coronavirus Govt Funding)",
"BTI030": "BNCH11141 (SEN)",
"BTI040": "BNCH11142 (Other Revenue)",
"BTI060": "BNCH11151 (Other Government Revenue Grants)",
"BTI070": "BNCH11161 (Government source (non-grant))",
"BTI080": "BNCH11162 (Academies)",
"BTI090": "BNCH11163 (Non- Government)",
"BTI100": "BNCH11201 (Income from facilities and services)",
"BTI110": "BNCH11202 (Income from catering)",
"BTI120": "BNCH11203 (Receipts from supply teacher insurance claims)",
"BTI130": "BNCH11204 (Other income - revenue)",
"BTI140": "BNCH11301 (Donations and/or voluntary funds - revenue)",
"BTI150": "BNCH11401 (Investment income)",
"BTE010": "BNCH21101 (Teaching staff)",
"BTE020": "BNCH21102 (Supply teaching staff - extra note in guidance)",
"BTE030": "BNCH21103 (Education support staff)",
"BTE040": "BNCH21104 (Administrative and clerical staff)",
"BTE050": "BNCH21105 (Premises staff)",
"BTE060": "BNCH21106 (Catering staff)",
"BTE070": "BNCH21107 (Other staff)",
"BTE080": "BNCH21201 (Indirect employee expenses)",
"BTE090": "BNCH21202 (Staff development and training)",
"BTE110": "BNCH21203 (Staff-related insurance)",
"BTE100": "BNCH21204 (Supply teacher insurance)",
"BTE120": "BNCH21301 (Maintenance of premises)",
"BTE130": "BNCH21401 (Cleaning and caretaking)",
"BTE140": "BNCH21402 (Water and sewerage)",
"BTE150": "BNCH21403 (Energy)",
"BTE160": "BNCH21404 (Rent and rates)",
"BTE170": "BNCH21405 (Grounds maintenance)",
"BTE180": "BNCH21406 (Other occupation costs)",
"BTE190": "BNCH21501 (Special facilities)",
"BTE200": "BNCH21601 (Learning resources (not ICT equipment))",
"BTE210": "BNCH21602 (ICT learning resources)",
"BTE220": "BNCH21603 (Examination fees)",
"BTE230": "BNCH21604 (Educational Consultancy)",
"BTE240": "BNCH21606 (Agency supply teaching staff)",
"BTE250": "BNCH21701 (Catering supplies)",
"BTE300": "BNCH21702 (Professional Services - non-curriculum)",
"BTE260": "BNCH21703 (Auditor costs)",
"BTE270": "BNCH21705 (Other insurance premiums)",
"BTE280": "BNCH21706 (Administrative supplies - non educational)",
"BTE290": "BNCH21707 (Direct revenue financing (Revenue contributions to capital))",
"BTE320": "BNCH21801 (Interest charges for Loan and bank)",
"BTE310": "BNCH21802 (PFI Charges)",
"BTI170": "BNCH43001 (Contributions from Academies to Trust)",
"BTB030": "BNCH44001CS (Closing Balance (Restricted and Unrestricted Funds))",
"BAB030-T": "BNCH44001Bench (Closing Balance (Restricted and Unrestricted Funds))",
"BTI101": "BNCH11205 (Other Income from facilities and services)",
"BTI011-A": "BNCH11123-BTI011-A (MAT Central services - Income)",
},
}

aar_central_services_column_eval = {
"default": {
"BNCH11123-BAI011-A (Academies - Income)": "0.0",
},
2023: {},
2024: {
"BNCH11300T (Voluntary income)": "`BNCH11301 (Donations and/or voluntary funds - revenue)`",
# "BNCH11122 (Other)": "0.0",
"BNCH11110T (EFA Revenue Grants)": "`BNCH11101 (Start-up grants)` + `BNCHBAI061 (Coronavirus Govt Funding)`",
"BNCH11131 (DfE Family Revenue Grants)": "0.0",
# "BNCH11140T (Local Authority Revenue Grants)": "[BTI030] + [BTI040]",
# "BNCH11100T (Grant funding)": "[BTI050] + 0 + 0 + [BTI030] + [BTI040] + [BTI060] + [BTI061]",
# "BNCH11160T (Other Revenue Income)": "[BTI070] + [BTI080] + [BTI090]",
# "BNCH11200T (Self-generated income)": "[BTI100] + [BTI110] + [BTI120] + [BTI130] + [BTI101]",
"BNCH11400T (Investment income)": "`BNCH11401 (Investment income)`",
# "BNCH11000T (Revenue Income)": "[BTI050] + [BNCH11122] + [BNCH11131] + [BTI030] + [BTI040] + [BTI060] + [BTI070] + [BTI080] + [BTI090] + [BTI100] + [BTI110] + [BTI120] + [BTI130] + [BTI140] + [BTI150] + [BTI101] + [BTI061]",
# "BNCH21100T (Staff costs)": "[BTE010] + [BTE020] + [BTE030] + [BTE040] + [BTE050] + [BTE060] + [BTE070]",
# "BNCH21200T (Staff support costs)": "[BTE080] + [BTE090] + [BTE110] + [BTE100]",
"BNCH21401 (Cleaning and caretaking)": "`BNCH21401 (Cleaning and caretaking)`",
# "BNCH21400T (Other occupancy costs)": "[BTE130] + [BTE140] + [BTE150] + [BTE160] + [BTE170] + [BTE180]",
# "BNCH21600T (Educational supplies and services)": "[BTE200] + [BTE210] + [BTE220] + [BTE230] + [BTE240]",
# "BNCH21700T (Other supplies and services)": "[BTE250] + [BTE300] + [BTE260] + [BTE270] + [BTE280] + [BTE290]",
# "BNCH21800T (Funding costs)": "[BTE320] + [BTE310]",
# "BNCH20000T (Total Costs)": "[BTE010] + [BTE020] + [BTE030] + [BTE040] + [BTE050] + [BTE060] + [BTE070] + [BTE080] + [BTE090] + [BTE110] + [BTE100] + [BTE120] + [BTE130] + [BTE140] + [BTE150] + [BTE160] + [BTE170] + [BTE180] + [BTE190] + [BTE200] + [BTE210] + [BTE220] + [BTE230] + [BTE240] + [BTE250] + [BTE300] + [BTE260] + [BTE270] + [BTE280] + [BTE290] + [BTE320] + [BTE310]) AS [BNCH20000T]",
# "BNCH44001T": "BTB030 + BAB030-T",
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,30 @@
import pipeline.mappings as mappings


def prepare_central_services_data(cs_path, current_year: int):
central_services_financial = pd.read_csv(
cs_path,
encoding="utf-8",
usecols=lambda x: x in input_schemas.aar_central_services.keys(),
dtype=input_schemas.aar_central_services,
def prepare_central_services_data(cs_path, year: int):
central_services_financial = (
pd.read_csv(
cs_path,
encoding="utf-8",
usecols=input_schemas.aar_central_services.get(
year, input_schemas.aar_central_services["default"]
).keys(),
dtype=input_schemas.aar_central_services.get(
year, input_schemas.aar_central_services["default"]
),
)
.rename(
columns=input_schemas.aar_central_services_column_mappings.get(
year, input_schemas.aar_central_services_column_mappings["default"]
),
)
.dropna(subset=[input_schemas.aar_central_services_index_col])
)

if (
"BNCH11123-BTI011-A (MAT Central services - Income)"
not in central_services_financial.columns
):
central_services_financial[
"BNCH11123-BTI011-A (MAT Central services - Income)"
] = 0.0
for column, eval_ in input_schemas.aar_central_services_column_eval.get(
year, input_schemas.aar_central_services_column_eval["default"]
).items():
central_services_financial[column] = central_services_financial.eval(eval_)

central_services_financial["Income_Direct revenue finance"] = (
central_services_financial[
Expand Down
2 changes: 1 addition & 1 deletion data-pipeline/tests/unit/pre_processing/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def prepared_central_services_data(
aar_central_services_data: pd.DataFrame,
) -> pd.DataFrame:
return prepare_central_services_data(
StringIO(aar_central_services_data.to_csv()), 2022
StringIO(aar_central_services_data.to_csv()), 2023
)


Expand Down

0 comments on commit fbfbc07

Please sign in to comment.