diff --git a/project/constants/csv_headings.py b/project/constants/csv_headings.py index 4bf5357f..43d37a93 100644 --- a/project/constants/csv_headings.py +++ b/project/constants/csv_headings.py @@ -1,5 +1,5 @@ CSV_HEADINGS = ( - # 'Patient') + # patient { "heading": "NHS Number", "model_field": "nhs_number", @@ -59,17 +59,17 @@ }, # Visit { - "heading": "'Visit')/Appointment Date", + "heading": "Visit/Appointment Date", "model_field": "visit_date", "model": "Visit", }, { - "heading": "'Patient') Height (cm)", + "heading": "Patient Height (cm)", "model_field": "height", "model": "Visit", }, { - "heading": "'Patient') Weight (kg)", + "heading": "Patient Weight (kg)", "model_field": "weight", "model": "Visit", }, @@ -100,7 +100,7 @@ "model": "Visit", }, { - "heading": "At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the 'Patient') using any other method of glucose monitoring?", + "heading": "At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?", "model_field": "glucose_monitoring", "model": "Visit", }, @@ -165,7 +165,7 @@ "model": "Visit", }, { - "heading": "At time of, or following measurement of thyroid function, was the 'Patient') prescribed any thyroid treatment?", + "heading": "At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?", "model_field": "thyroid_treatment_status", "model": "Visit", }, @@ -175,7 +175,7 @@ "model": "Visit", }, { - "heading": "Has the 'Patient') been recommended a Gluten-free diet?", + "heading": "Has the patient been recommended a Gluten-free diet?", "model_field": "gluten_free_diet", "model": "Visit", }, @@ -185,17 +185,17 @@ "model": "Visit", }, { - "heading": "Was the 'Patient') assessed as requiring additional psychological/CAMHS support outside of MDT clinics?", + "heading": "Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?", "model_field": "psychological_additional_support_status", "model": "Visit", }, { - "heading": "Does the 'Patient') smoke?", + "heading": "Does the patient smoke?", "model_field": "smoking_status", "model": "Visit", }, { - "heading": "Date of offer of referral to smoking cessation service (if 'Patient') is a current smoker)", + "heading": "Date of offer of referral to smoking cessation service (if patient is a current smoker)", "model_field": "smoking_cessation_referral_date", "model": "Visit", }, @@ -205,7 +205,7 @@ "model": "Visit", }, { - "heading": "Was the 'Patient') offered an additional appointment with a paediatric dietitian?", + "heading": "Was the patient offered an additional appointment with a paediatric dietitian?", "model_field": "dietician_additional_appointment_offered", "model": "Visit", }, @@ -215,7 +215,7 @@ "model": "Visit", }, { - "heading": "Was the 'Patient') using (or trained to use) blood ketone testing equipment at time of 'Visit')?", + "heading": "Was the patient using (or trained to use) blood ketone testing equipment at time of visit?", "model_field": "ketone_meter_training", "model": "Visit", }, @@ -245,7 +245,7 @@ "model": "Visit", }, { - "heading": "Only complete if DKA selected in previous question: During this DKA admission did the 'Patient') receive any of the following therapies?", + "heading": "Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?", "model_field": "dka_additional_therapies", "model": "Visit", }, diff --git a/project/npda/general_functions/csv_upload.py b/project/npda/general_functions/csv_upload.py index 0fef49c5..82697d5d 100644 --- a/project/npda/general_functions/csv_upload.py +++ b/project/npda/general_functions/csv_upload.py @@ -5,6 +5,7 @@ import asyncio import collections import re +from pprint import pprint # django imports from django.apps import apps @@ -125,89 +126,15 @@ async def csv_upload(user, dataframe, csv_file, pdu_pz_code): Returns the empty dict if successful, otherwise ValidationErrors indexed by the row they occurred at Also return the dataframe for later summary purposes """ + + # Get the models Patient = apps.get_model("npda", "Patient") Transfer = apps.get_model("npda", "Transfer") Visit = apps.get_model("npda", "Visit") Submission = apps.get_model("npda", "Submission") PaediatricDiabetesUnit = apps.get_model("npda", "PaediatricDiabetesUnit") - # get the PDU object - # TODO #249 MRB: handle case where PDU does not exist - pdu = await PaediatricDiabetesUnit.objects.aget(pz_code=pdu_pz_code) - - # Set previous submission to inactive - if await Submission.objects.filter( - paediatric_diabetes_unit__pz_code=pdu.pz_code, - audit_year=date.today().year, - submission_active=True, - ).aexists(): - original_submission = await Submission.objects.filter( - submission_active=True, - paediatric_diabetes_unit__pz_code=pdu.pz_code, - audit_year=date.today().year, - ).aget() # there can be only one of these - store it in a variable in case we need to revert - else: - original_submission = None - - # Create new submission for the audit year - # It is not possble to create submissions in years other than the current year - try: - new_submission = await Submission.objects.acreate( - paediatric_diabetes_unit=pdu, - audit_year=date.today().year, - submission_date=timezone.now(), - submission_by=user, # user is the user who is logged in. Passed in as a parameter - submission_active=True, - ) - - if csv_file: - # save the csv file with a custom name - new_filename = ( - f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv" - ) - - # save=False so it doesn't try to save the parent, which would cause an error in an async context - # we save immediately after this anyway - new_submission.csv_file.save(new_filename, csv_file, save=False) - - await new_submission.asave() - - except Exception as e: - logger.error(f"Error creating new submission: {e}") - # the new submission was not created - no action required as the previous submission is still active - raise ValidationError( - { - "csv_upload": "Error creating new submission. The old submission has been restored." - } - ) - - # now can delete all patients and visits from the previous active submission - if original_submission: - try: - original_submission_patient_count = await Patient.objects.filter( - submissions=original_submission - ).acount() - print( - f"Deleting patients from previous submission: {original_submission_patient_count}" - ) - await Patient.objects.filter(submissions=original_submission).adelete() - except Exception as e: - raise ValidationError( - {"csv_upload": "Error deleting patients from previous submission"} - ) - - # now can delete the any previous active submission's csv file (if it exists) - # and remove the path from the field by setting it to None - # the rest of the submission will be retained - if original_submission: - original_submission.submission_active = False - try: - await original_submission.asave() # this action will delete the csv file also as per the save method in the model - except Exception as e: - raise ValidationError( - {"csv_upload": "Error deactivating previous submission"} - ) - + # Helper functions def csv_value_to_model_value(model_field, value): if pd.isnull(value): return None @@ -234,7 +161,7 @@ def row_to_dict(row, model): ret = {} for entry in CSV_HEADINGS: - if "model" in entry and entry["model"] == model: + if "model" in entry and apps.get_model("npda", entry["model"]) == model: model_field_name = entry["model_field"] model_field_definition = model._meta.get_field(model_field_name) @@ -286,34 +213,12 @@ async def validate_rows(rows, async_client): axis=1, ) - return (patient_form, transfer_fields, patient_row_index, visits) - - # async def validate_rows(rows, async_client): - # first_row = rows.iloc[0] - # patient_row_index = first_row["row_index"] - - # (transfer_fields, transfer_field_errors) = validate_transfer(first_row) - # (patient_form, patient_field_errors) = await validate_patient_using_form( - # first_row, async_client - # ) - - # visits = [] - - # for _, row in rows.iterrows(): - # (visit_form, visit_field_errors) = validate_visit_using_form( - # patient_form.instance, row - # ) - # visits.append((visit_form, visit_field_errors, row["row_index"])) - - # first_row_field_errors = transfer_field_errors | patient_field_errors - - # return ( - # patient_form, - # transfer_fields, - # patient_row_index, - # first_row_field_errors, - # visits, - # ) + return ( + patient_form, + transfer_fields, + patient_row_index, + visits, + ) def create_instance(model, form): # We want to retain fields even if they're invalid so that we can return them to the user @@ -334,12 +239,99 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): tasks = [] async with asyncio.TaskGroup() as tg: - for _, rows in visits_by_patient: + for _, rows in rows_by_patient: task = tg.create_task(validate_rows(rows, async_client)) tasks.append(task) return [task.result() for task in tasks] + # Code starts here.... + + """" + Create the submission and save the csv file + """ + + # get the PDU object + # TODO #249 MRB: handle case where PDU does not exist + pdu = await PaediatricDiabetesUnit.objects.aget(pz_code=pdu_pz_code) + + # Set previous submission to inactive + if await Submission.objects.filter( + paediatric_diabetes_unit__pz_code=pdu.pz_code, + audit_year=date.today().year, + submission_active=True, + ).aexists(): + original_submission = await Submission.objects.filter( + submission_active=True, + paediatric_diabetes_unit__pz_code=pdu.pz_code, + audit_year=date.today().year, + ).aget() # there can be only one of these - store it in a variable in case we need to revert + else: + original_submission = None + + # Create new submission for the audit year + # It is not possble to create submissions in years other than the current year + try: + new_submission = await Submission.objects.acreate( + paediatric_diabetes_unit=pdu, + audit_year=date.today().year, + submission_date=timezone.now(), + submission_by=user, # user is the user who is logged in. Passed in as a parameter + submission_active=True, + ) + + if csv_file: + # save the csv file with a custom name + new_filename = ( + f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv" + ) + + # save=False so it doesn't try to save the parent, which would cause an error in an async context + # we save immediately after this anyway + new_submission.csv_file.save(new_filename, csv_file, save=False) + + await new_submission.asave() + + except Exception as e: + logger.error(f"Error creating new submission: {e}") + # the new submission was not created - no action required as the previous submission is still active + raise ValidationError( + { + "csv_upload": "Error creating new submission. The old submission has been restored." + } + ) + + # now can delete all patients and visits from the previous active submission + if original_submission: + try: + original_submission_patient_count = await Patient.objects.filter( + submissions=original_submission + ).acount() + print( + f"Deleting patients from previous submission: {original_submission_patient_count}" + ) + await Patient.objects.filter(submissions=original_submission).adelete() + except Exception as e: + raise ValidationError( + {"csv_upload": "Error deleting patients from previous submission"} + ) + + # now can delete the any previous active submission's csv file (if it exists) + # and remove the path from the field by setting it to None + # the rest of the submission will be retained + if original_submission: + original_submission.submission_active = False + try: + await original_submission.asave() # this action will delete the csv file also as per the save method in the model + except Exception as e: + raise ValidationError( + {"csv_upload": "Error deactivating previous submission"} + ) + + """ + Process the csv file and validate adn save the data in the tables, parsing any errors + """ + # Remember the original row number to help users find where the problem was in the CSV dataframe = dataframe.assign(row_index=np.arange(dataframe.shape[0])) @@ -352,7 +344,7 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): async with httpx.AsyncClient() as async_client: validation_results_by_patient = await validate_rows_in_parallel( - visits_by_patient, async_client + rows_by_patient=visits_by_patient, async_client=async_client ) for ( @@ -360,7 +352,7 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): transfer_fields, patient_row_index, first_row_field_errors, - visits, + parsed_visits, ) in validation_results_by_patient: # Errors parsing the Transfer or Patient fields for field, error in first_row_field_errors.items(): @@ -390,7 +382,7 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): # We don't know what field caused the error so add to __all__ errors_to_return[patient_row_index]["__all__"].append(error) - for visit_form, visit_field_errors, visit_row_index in visits: + for visit_form, visit_field_errors, visit_row_index in parsed_visits: # Errors parsing the Visit fields for field, error in visit_field_errors.items(): errors_to_return[visit_row_index][field].append(error) diff --git a/project/npda/tests/test_csv_upload.py b/project/npda/tests/test_csv_upload.py index 5666200d..45c9c202 100644 --- a/project/npda/tests/test_csv_upload.py +++ b/project/npda/tests/test_csv_upload.py @@ -55,12 +55,13 @@ def mock_remote_calls(): @pytest.fixture def valid_df(dummy_sheets_folder): - file = dummy_sheets_folder / 'dummy_sheet.csv' + file = dummy_sheets_folder / "dummy_sheet.csv" return read_csv(file).df + @pytest.fixture def single_row_valid_df(dummy_sheets_folder): - file = dummy_sheets_folder / 'dummy_sheet.csv' + file = dummy_sheets_folder / "dummy_sheet.csv" df = read_csv(file).df df = df.head(1) @@ -70,31 +71,31 @@ def single_row_valid_df(dummy_sheets_folder): @pytest.fixture def one_patient_two_visits(dummy_sheets_folder): - file = dummy_sheets_folder / 'dummy_sheet.csv' + file = dummy_sheets_folder / "dummy_sheet.csv" df = read_csv(file).df df = df.head(2) - assert(df["NHS Number"][0] == df["NHS Number"][1]) + assert df["NHS Number"][0] == df["NHS Number"][1] return df @pytest.fixture def two_patients_first_with_two_visits_second_with_one(dummy_sheets_folder): - file = dummy_sheets_folder / 'dummy_sheet.csv' + file = dummy_sheets_folder / "dummy_sheet.csv" df = read_csv(file).df df = df.head(3) - assert(df["NHS Number"][0] == df["NHS Number"][1]) - assert(df["NHS Number"][2] != df["NHS Number"][0]) + assert df["NHS Number"][0] == df["NHS Number"][1] + assert df["NHS Number"][2] != df["NHS Number"][0] return df @pytest.fixture def two_patients_with_one_visit_each(dummy_sheets_folder): - file = dummy_sheets_folder / 'dummy_sheet.csv' + file = dummy_sheets_folder / "dummy_sheet.csv" df = read_csv(file).df df = df.drop([0]).head(2).reset_index(drop=True) @@ -134,6 +135,7 @@ def read_csv_from_str(contents): @pytest.mark.django_db def test_create_patient(test_user, single_row_valid_df): + csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() @@ -631,8 +633,8 @@ def test_spaces_in_date_column_name(test_user, dummy_sheet_csv): csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - - assert(patient.date_of_birth == df["Date of Birth"][0].date()) + + assert patient.date_of_birth == df["Date of Birth"][0].date() @pytest.mark.django_db @@ -644,7 +646,7 @@ def test_different_column_order(test_user, single_row_valid_df): df = single_row_valid_df[columns] csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - assert(Patient.objects.count() == 1) + assert Patient.objects.count() == 1 # TODO MRB: these should probably be calling the route directly? https://github.com/rcpch/national-paediatric-diabetes-audit/issues/353 @@ -656,7 +658,7 @@ def test_additional_columns_causes_error(test_user, single_row_valid_df): csv = single_row_valid_df.to_csv(index=False, date_format="%d/%m/%Y") additional_columns = read_csv_from_str(csv).additional_columns - assert(additional_columns == ["extra_one", "extra_two"]) + assert additional_columns == ["extra_one", "extra_two"] @pytest.mark.django_db @@ -671,16 +673,21 @@ def test_duplicate_columns_causes_error(test_user, single_row_valid_df): csv = csv.replace("Date of Birth_2", "Date of Birth") duplicate_columns = read_csv_from_str(csv).duplicate_columns - assert(duplicate_columns == ["NHS Number", "Date of Birth"]) + assert duplicate_columns == ["NHS Number", "Date of Birth"] @pytest.mark.django_db def test_missing_columns_causes_error(test_user, single_row_valid_df): - df = single_row_valid_df.drop(columns=['Urinary Albumin Level (ACR)', 'Total Cholesterol Level (mmol/l)']) + df = single_row_valid_df.drop( + columns=["Urinary Albumin Level (ACR)", "Total Cholesterol Level (mmol/l)"] + ) csv = df.to_csv(index=False, date_format="%d/%m/%Y") missing_columns = read_csv_from_str(csv).missing_columns - assert(missing_columns == ["Urinary Albumin Level (ACR)", "Total Cholesterol Level (mmol/l)"]) + assert missing_columns == [ + "Urinary Albumin Level (ACR)", + "Total Cholesterol Level (mmol/l)", + ] @pytest.mark.django_db @@ -694,7 +701,7 @@ def test_case_insensitive_column_headers(test_user, dummy_sheet_csv): df = read_csv_from_str(csv).df errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - assert(len(errors) == 0) + assert len(errors) == 0 @pytest.mark.django_db @@ -702,13 +709,13 @@ def test_mixed_case_column_headers(test_user, dummy_sheet_csv): csv = dummy_sheet_csv.replace("NHS Number", "NHS number") df = read_csv_from_str(csv).df - assert(df.columns[0] == "NHS Number") + assert df.columns[0] == "NHS Number" @pytest.mark.django_db def test_first_row_with_extra_cell_at_the_start(test_user, single_row_valid_df): csv = single_row_valid_df.to_csv(index=False, date_format="%d/%m/%Y") - + lines = csv.split("\n") lines[1] = "extra_value," + lines[1] @@ -721,7 +728,7 @@ def test_first_row_with_extra_cell_at_the_start(test_user, single_row_valid_df): @pytest.mark.django_db def test_first_row_with_extra_cell_on_the_end(test_user, single_row_valid_df): csv = single_row_valid_df.to_csv(index=False, date_format="%d/%m/%Y") - + lines = csv.split("\n") lines[1] += ",extra_value" @@ -734,7 +741,7 @@ def test_first_row_with_extra_cell_on_the_end(test_user, single_row_valid_df): @pytest.mark.django_db def test_second_row_with_extra_cell_at_the_start(test_user, one_patient_two_visits): csv = one_patient_two_visits.to_csv(index=False, date_format="%d/%m/%Y") - + lines = csv.split("\n") lines[2] = "extra_value," + lines[1] @@ -747,7 +754,7 @@ def test_second_row_with_extra_cell_at_the_start(test_user, one_patient_two_visi @pytest.mark.django_db def test_second_row_with_extra_cell_on_the_end(test_user, one_patient_two_visits): csv = one_patient_two_visits.to_csv(index=False, date_format="%d/%m/%Y") - + lines = csv.split("\n") lines[2] += ",extra_value" @@ -765,10 +772,10 @@ def test_upload_without_headers(test_user, one_patient_two_visits): lines = lines[1:] csv = "\n".join(lines) - + df = read_csv_from_str(csv).df csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - assert(Patient.objects.count() == 1) - assert(Visit.objects.count() == 2) + assert Patient.objects.count() == 1 + assert Visit.objects.count() == 2