diff --git a/condensator.py b/condensator.py deleted file mode 100644 index 05e050f..0000000 --- a/condensator.py +++ /dev/null @@ -1,20 +0,0 @@ -import os -from pathlib import Path - -def write_codebase_to_file(root_dir, output_file): - with open(output_file, 'w') as outfile: - for subdir, _, files in os.walk(root_dir): - for file in files: - if file.endswith('.py'): - file_path = os.path.join(subdir, file) - print(file_path) - outfile.write(f"### FILE: {file_path} ###\n") - with open(file_path, 'r') as infile: - outfile.write(infile.read()) - outfile.write("\n\n") - -if __name__ == "__main__": - root_directory = '.' # Replace with the path to your codebase - output_filename = 'condensed_codebase.txt' # Replace with your desired output file name - write_codebase_to_file(root_directory, output_filename) - print(f"Codebase has been written to {output_filename}") \ No newline at end of file diff --git a/datalab/read_dataset.ipynb b/datalab/read_dataset.ipynb index 150b9cf..812fc37 100644 --- a/datalab/read_dataset.ipynb +++ b/datalab/read_dataset.ipynb @@ -37,7 +37,7 @@ "source": [ "proc_reader = datasets.load_data(chunksize=75835,\n", " source_path=TEST_DATA_DIR,\n", - " storage_path=\"./tmp\",\n", + " storage_path=\"./temp\",\n", " discretize=True,\n", " deep_supervision=True,\n", " task=\"DECOMP\")" diff --git a/model_templates/__init__.py b/model_templates/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/model_templates/mimic/.gitignore b/model_templates/mimic/.gitignore deleted file mode 100644 index d78ee4b..0000000 --- a/model_templates/mimic/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.ckpt -*.obj \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/decompensation/data_config.json b/model_templates/mimic/logistic_default/decompensation/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/logistic_default/decompensation/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/decompensation/model_config.json b/model_templates/mimic/logistic_default/decompensation/model_config.json deleted file mode 100644 index 826c798..0000000 --- a/model_templates/mimic/logistic_default/decompensation/model_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "penalty": "l2", - "C": 0.001, - "solver": "liblinear", - "multi_class": "auto" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/decompensation/pipeline_config.json b/model_templates/mimic/logistic_default/decompensation/pipeline_config.json deleted file mode 100644 index 20bd54e..0000000 --- a/model_templates/mimic/logistic_default/decompensation/pipeline_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "model_name": "logistic_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/in_hospital_mortality/data_config.json b/model_templates/mimic/logistic_default/in_hospital_mortality/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/logistic_default/in_hospital_mortality/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/in_hospital_mortality/model_config.json b/model_templates/mimic/logistic_default/in_hospital_mortality/model_config.json deleted file mode 100644 index 826c798..0000000 --- a/model_templates/mimic/logistic_default/in_hospital_mortality/model_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "penalty": "l2", - "C": 0.001, - "solver": "liblinear", - "multi_class": "auto" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/in_hospital_mortality/pipeline_config.json b/model_templates/mimic/logistic_default/in_hospital_mortality/pipeline_config.json deleted file mode 100644 index 20bd54e..0000000 --- a/model_templates/mimic/logistic_default/in_hospital_mortality/pipeline_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "model_name": "logistic_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/length_of_stay/data_config.json b/model_templates/mimic/logistic_default/length_of_stay/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/logistic_default/length_of_stay/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/length_of_stay/model_config.json b/model_templates/mimic/logistic_default/length_of_stay/model_config.json deleted file mode 100644 index d8a1bfe..0000000 --- a/model_templates/mimic/logistic_default/length_of_stay/model_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "penalty": "l1", - "C": 0.1, - "solver": "liblinear", - "multi_class": "auto" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/length_of_stay/pipeline_config.json b/model_templates/mimic/logistic_default/length_of_stay/pipeline_config.json deleted file mode 100644 index 20bd54e..0000000 --- a/model_templates/mimic/logistic_default/length_of_stay/pipeline_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "model_name": "logistic_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/phenotyping/data_config.json b/model_templates/mimic/logistic_default/phenotyping/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/logistic_default/phenotyping/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/phenotyping/model_config.json b/model_templates/mimic/logistic_default/phenotyping/model_config.json deleted file mode 100644 index 2b87735..0000000 --- a/model_templates/mimic/logistic_default/phenotyping/model_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "penalty": "l2", - "C": 1e-5, - "solver": "sag", - "multi_class": "multinomial" -} \ No newline at end of file diff --git a/model_templates/mimic/logistic_default/phenotyping/pipeline_config.json b/model_templates/mimic/logistic_default/phenotyping/pipeline_config.json deleted file mode 100644 index 20bd54e..0000000 --- a/model_templates/mimic/logistic_default/phenotyping/pipeline_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "model_name": "logistic_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_concept_drift_ehr/decompensation/history.json b/model_templates/mimic/lstm_concept_drift_ehr/decompensation/history.json deleted file mode 100644 index 9ea49d1..0000000 --- a/model_templates/mimic/lstm_concept_drift_ehr/decompensation/history.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "loss": [ - 0.20228461921215057, - 0.13717161118984222, - 0.12314295768737793, - 0.0942632183432579, - 0.07558345794677734 - ], - "auc_2": [ - 0.7597775459289551, - 0.886059045791626, - 0.9244322180747986, - 0.9601970911026001, - 0.9735322594642639 - ], - "auc_3": [ - 0.21838195621967316, - 0.5246959328651428, - 0.5290137529373169, - 0.7154638171195984, - 0.8079134821891785 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ], - "finished": true, - "score": [ - 0.28474223613739014, - 0.5637025833129883, - 0.027103759348392487 - ] -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_concept_drift_ehr/decompensation/loss.png b/model_templates/mimic/lstm_concept_drift_ehr/decompensation/loss.png deleted file mode 100644 index 50995d3..0000000 Binary files a/model_templates/mimic/lstm_concept_drift_ehr/decompensation/loss.png and /dev/null differ diff --git a/model_templates/mimic/lstm_concept_drit_bm/decompensation/history.json b/model_templates/mimic/lstm_concept_drit_bm/decompensation/history.json deleted file mode 100644 index 9de9956..0000000 --- a/model_templates/mimic/lstm_concept_drit_bm/decompensation/history.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "loss": [ - 0.18711377680301666, - 0.14152663946151733, - 0.11633744835853577, - 0.09917200356721878, - 0.11175470054149628 - ], - "auc": [ - 0.7586259841918945, - 0.8684936165809631, - 0.918787956237793, - 0.9313992261886597, - 0.9320964217185974 - ], - "auc_1": [ - 0.20057374238967896, - 0.3368876874446869, - 0.5368055105209351, - 0.6360592842102051, - 0.5482471585273743 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ], - "finished": true, - "score": [ - 0.12497211247682571, - 0.9087468385696411, - 0.4387904703617096 - ] -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_concept_drit_bm/decompensation/loss.png b/model_templates/mimic/lstm_concept_drit_bm/decompensation/loss.png deleted file mode 100644 index 6e66470..0000000 Binary files a/model_templates/mimic/lstm_concept_drit_bm/decompensation/loss.png and /dev/null differ diff --git a/model_templates/mimic/lstm_debug/decompensation/data_config.json b/model_templates/mimic/lstm_debug/decompensation/data_config.json deleted file mode 100644 index f9ad193..0000000 --- a/model_templates/mimic/lstm_debug/decompensation/data_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "source_path": "mimic-iii-demo" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/decompensation/history.json b/model_templates/mimic/lstm_debug/decompensation/history.json deleted file mode 100644 index 032c7c4..0000000 --- a/model_templates/mimic/lstm_debug/decompensation/history.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "loss": [ - 0.20260387659072876, - 0.15685933828353882, - 0.1348450928926468 - ], - "auc": [ - 0.7056554555892944, - 0.7586597204208374, - 0.8155806660652161 - ], - "auc_1": [ - 0.10103190690279007, - 0.1985863745212555, - 0.3934503495693207 - ], - "val_loss": [ - 0.24352674186229706, - 0.25516465306282043, - 0.24636073410511017 - ], - "val_auc": [ - 0.8191491961479187, - 0.8457174897193909, - 0.8431413769721985 - ], - "val_auc_1": [ - 0.17559653520584106, - 0.27318406105041504, - 0.36032140254974365 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ] -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/decompensation/loss.png b/model_templates/mimic/lstm_debug/decompensation/loss.png deleted file mode 100644 index 1cdc9b1..0000000 Binary files a/model_templates/mimic/lstm_debug/decompensation/loss.png and /dev/null differ diff --git a/model_templates/mimic/lstm_debug/decompensation/model_config.json b/model_templates/mimic/lstm_debug/decompensation/model_config.json deleted file mode 100644 index f16a2b1..0000000 --- a/model_templates/mimic/lstm_debug/decompensation/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "DECOMP" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/decompensation/pipeline_config.json b/model_templates/mimic/lstm_debug/decompensation/pipeline_config.json deleted file mode 100644 index 7a913e4..0000000 --- a/model_templates/mimic/lstm_debug/decompensation/pipeline_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - } -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/in_hospital_mortality/data_config.json b/model_templates/mimic/lstm_debug/in_hospital_mortality/data_config.json deleted file mode 100644 index f9ad193..0000000 --- a/model_templates/mimic/lstm_debug/in_hospital_mortality/data_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "source_path": "mimic-iii-demo" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/in_hospital_mortality/model_config.json b/model_templates/mimic/lstm_debug/in_hospital_mortality/model_config.json deleted file mode 100644 index 2be53eb..0000000 --- a/model_templates/mimic/lstm_debug/in_hospital_mortality/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "IHM" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/in_hospital_mortality/pipeline_config.json b/model_templates/mimic/lstm_debug/in_hospital_mortality/pipeline_config.json deleted file mode 100644 index 7a913e4..0000000 --- a/model_templates/mimic/lstm_debug/in_hospital_mortality/pipeline_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - } -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/length_of_stay/data_config.json b/model_templates/mimic/lstm_debug/length_of_stay/data_config.json deleted file mode 100644 index f9ad193..0000000 --- a/model_templates/mimic/lstm_debug/length_of_stay/data_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "source_path": "mimic-iii-demo" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/length_of_stay/model_config.json b/model_templates/mimic/lstm_debug/length_of_stay/model_config.json deleted file mode 100644 index 604e74c..0000000 --- a/model_templates/mimic/lstm_debug/length_of_stay/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "LOS" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/length_of_stay/pipeline_config.json b/model_templates/mimic/lstm_debug/length_of_stay/pipeline_config.json deleted file mode 100644 index 594337f..0000000 --- a/model_templates/mimic/lstm_debug/length_of_stay/pipeline_config.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "LOS", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "accuracy" - ], - "loss": "categorical_crossentropy", - "run_eagerly": true - } -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/phenotyping/data_config.json b/model_templates/mimic/lstm_debug/phenotyping/data_config.json deleted file mode 100644 index f9ad193..0000000 --- a/model_templates/mimic/lstm_debug/phenotyping/data_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "source_path": "mimic-iii-demo" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/phenotyping/model_config.json b/model_templates/mimic/lstm_debug/phenotyping/model_config.json deleted file mode 100644 index 33dbb28..0000000 --- a/model_templates/mimic/lstm_debug/phenotyping/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "PHENO" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_debug/phenotyping/pipeline_config.json b/model_templates/mimic/lstm_debug/phenotyping/pipeline_config.json deleted file mode 100644 index 968ea54..0000000 --- a/model_templates/mimic/lstm_debug/phenotyping/pipeline_config.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auccuracy" - ], - "loss": "categorical_crossentropy", - "run_eagerly": true - } -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/decompensation/data_config.json b/model_templates/mimic/lstm_default/decompensation/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/lstm_default/decompensation/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/decompensation/model_config.json b/model_templates/mimic/lstm_default/decompensation/model_config.json deleted file mode 100644 index f16a2b1..0000000 --- a/model_templates/mimic/lstm_default/decompensation/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "DECOMP" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/decompensation/pipeline_config.json b/model_templates/mimic/lstm_default/decompensation/pipeline_config.json deleted file mode 100644 index 20bd54e..0000000 --- a/model_templates/mimic/lstm_default/decompensation/pipeline_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "model_name": "logistic_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/in_hospital_mortality/data_config.json b/model_templates/mimic/lstm_default/in_hospital_mortality/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/lstm_default/in_hospital_mortality/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/in_hospital_mortality/model_config.json b/model_templates/mimic/lstm_default/in_hospital_mortality/model_config.json deleted file mode 100644 index 2be53eb..0000000 --- a/model_templates/mimic/lstm_default/in_hospital_mortality/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "IHM" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/in_hospital_mortality/pipeline_config.json b/model_templates/mimic/lstm_default/in_hospital_mortality/pipeline_config.json deleted file mode 100644 index ef605f1..0000000 --- a/model_templates/mimic/lstm_default/in_hospital_mortality/pipeline_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "IHM", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/length_of_stay/data_config.json b/model_templates/mimic/lstm_default/length_of_stay/data_config.json deleted file mode 100644 index 63a94c7..0000000 --- a/model_templates/mimic/lstm_default/length_of_stay/data_config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "chunksize": 100000, - "source_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/length_of_stay/model_config.json b/model_templates/mimic/lstm_default/length_of_stay/model_config.json deleted file mode 100644 index 604e74c..0000000 --- a/model_templates/mimic/lstm_default/length_of_stay/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "LOS" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/length_of_stay/pipeline_config.json b/model_templates/mimic/lstm_default/length_of_stay/pipeline_config.json deleted file mode 100644 index 594337f..0000000 --- a/model_templates/mimic/lstm_default/length_of_stay/pipeline_config.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "LOS", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "accuracy" - ], - "loss": "categorical_crossentropy", - "run_eagerly": true - } -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/phenotyping/data_config.json b/model_templates/mimic/lstm_default/phenotyping/data_config.json deleted file mode 100644 index 673d846..0000000 --- a/model_templates/mimic/lstm_default/phenotyping/data_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "chunksize": 5000000, - "source_path": "mimic-iii", - "storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/phenotyping/model_config.json b/model_templates/mimic/lstm_default/phenotyping/model_config.json deleted file mode 100644 index 33dbb28..0000000 --- a/model_templates/mimic/lstm_default/phenotyping/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "PHENO" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default/phenotyping/pipeline_config.json b/model_templates/mimic/lstm_default/phenotyping/pipeline_config.json deleted file mode 100644 index b3e671d..0000000 --- a/model_templates/mimic/lstm_default/phenotyping/pipeline_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "PHENO", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auccuracy" - ], - "loss": "categorical_crossentropy", - "run_eagerly": true - }, - "data_storage_path": "processed" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/decompensation/history.json b/model_templates/mimic/lstm_default_full_set/decompensation/history.json deleted file mode 100644 index 62b08d4..0000000 --- a/model_templates/mimic/lstm_default_full_set/decompensation/history.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "loss": [ - 0.16448844969272614, - 0.12855836749076843, - 0.11693630367517471, - 0.11558887362480164, - 0.10231443494558334 - ], - "auc": [ - 0.736911416053772, - 0.7901445031166077, - 0.8146975636482239, - 0.7950760126113892, - 0.8841459155082703 - ], - "auc_1": [ - 0.12818971276283264, - 0.2514507472515106, - 0.3828285336494446, - 0.41003990173339844, - 0.48639941215515137 - ], - "val_loss": [ - 0.2566990852355957, - 0.2745034992694855, - 0.25804397463798523, - 0.22911764681339264, - 0.22092939913272858 - ], - "val_auc": [ - 0.6909284591674805, - 0.708802342414856, - 0.7741725444793701, - 0.8065164685249329, - 0.8210447430610657 - ], - "val_auc_1": [ - 0.11197873950004578, - 0.14602792263031006, - 0.1956888735294342, - 0.19884832203388214, - 0.33241981267929077 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ], - "finished": true, - "score": [ - 0.10656595975160599, - 0.8641465902328491, - 0.31452175974845886 - ] -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/decompensation/loss.png b/model_templates/mimic/lstm_default_full_set/decompensation/loss.png deleted file mode 100644 index 1cdc9b1..0000000 Binary files a/model_templates/mimic/lstm_default_full_set/decompensation/loss.png and /dev/null differ diff --git a/model_templates/mimic/lstm_default_full_set/decompensation/model_config.json b/model_templates/mimic/lstm_default_full_set/decompensation/model_config.json deleted file mode 100644 index f16a2b1..0000000 --- a/model_templates/mimic/lstm_default_full_set/decompensation/model_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "layer_size": 128, - "input_dim": 59, - "depth": 1, - "dropout_rate": 0, - "task": "DECOMP" -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/decompensation/pipeline_config.json b/model_templates/mimic/lstm_default_full_set/decompensation/pipeline_config.json deleted file mode 100644 index 7a913e4..0000000 --- a/model_templates/mimic/lstm_default_full_set/decompensation/pipeline_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "model_name": "lstm_default", - "batch_size": 8, - "epochs": 5, - "validation_fraction_split": 0.2, - "test_fraction_split": 0.2, - "task": "DECOMP", - "output_type": "one-hot", - "compiler_config": { - "optimizer": "adam", - "metrics": [ - "auc_2", - "auc_3" - ], - "loss": "binary_crossentropy", - "run_eagerly": true - } -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/in_hospital_mortality/history.json b/model_templates/mimic/lstm_default_full_set/in_hospital_mortality/history.json deleted file mode 100644 index c3f324b..0000000 --- a/model_templates/mimic/lstm_default_full_set/in_hospital_mortality/history.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "loss": [ - 0.5743802785873413, - 0.3000965416431427, - 0.35824355483055115, - 0.20481464266777039, - 0.18548832833766937 - ], - "auc_2": [ - 0.6851851344108582, - 0.9472934603691101, - 0.8905817866325378, - 0.9750692844390869, - 0.945868968963623 - ], - "auc_3": [ - 0.6031047105789185, - 0.9104804992675781, - 0.8114237189292908, - 0.9600292444229126, - 0.9544141292572021 - ], - "val_loss": [ - 0.5127763748168945, - 0.4795614778995514, - 0.5308961868286133, - 0.48721742630004883, - 0.6639019846916199 - ], - "val_auc_2": [ - 0.7999999523162842, - 0.8230769038200378, - 0.8076923489570618, - 0.8461537957191467, - 0.800000011920929 - ], - "val_auc_3": [ - 0.4407389163970947, - 0.4736884832382202, - 0.47407811880111694, - 0.5039927959442139, - 0.5744826197624207 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ], - "finished": true -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/length_of_stay/history.json b/model_templates/mimic/lstm_default_full_set/length_of_stay/history.json deleted file mode 100644 index 908f5d6..0000000 --- a/model_templates/mimic/lstm_default_full_set/length_of_stay/history.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "loss": [ - 1.8137593269348145, - 1.576322317123413, - 1.398484468460083, - 1.2352243661880493, - 1.1073614358901978 - ], - "accuracy": [ - 0.390262633562088, - 0.4525088369846344, - 0.5181999802589417, - 0.5691906213760376, - 0.5985255837440491 - ], - "val_loss": [ - 2.2317676544189453, - 2.587902069091797, - 3.026679515838623, - 3.0577306747436523, - 3.3147239685058594 - ], - "val_accuracy": [ - 0.2864101231098175, - 0.2756941020488739, - 0.28300049901008606, - 0.27325865626335144, - 0.2756941020488739 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ], - "finished": true -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/phenotyping/history.json b/model_templates/mimic/lstm_default_full_set/phenotyping/history.json deleted file mode 100644 index 6828ef2..0000000 --- a/model_templates/mimic/lstm_default_full_set/phenotyping/history.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "loss": [ - 12.334044456481934, - 14.08717155456543, - 14.669987678527832, - 14.633050918579102, - 15.505997657775879 - ], - "auc_4": [ - 0.6227800846099854, - 0.6862055659294128, - 0.6915063858032227, - 0.6911847591400146, - 0.6886156797409058 - ], - "auc_5": [ - 0.21828293800354004, - 0.25662341713905334, - 0.27799564599990845, - 0.26129111647605896, - 0.265255868434906 - ], - "val_loss": [ - 12.667723655700684, - 15.056920051574707, - 16.396257400512695, - 17.440183639526367, - 18.32649803161621 - ], - "val_auc_4": [ - 0.6413807272911072, - 0.6378331184387207, - 0.6439066529273987, - 0.6419544816017151, - 0.6429201364517212 - ], - "val_auc_5": [ - 0.23078219592571259, - 0.22082288563251495, - 0.2254050374031067, - 0.2135382741689682, - 0.21451467275619507 - ], - "lr": [ - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513, - 0.0010000000474974513 - ], - "finished": true, - "score": [ - 19.044710159301758, - 0.6460351943969727, - 0.26385894417762756 - ] -} \ No newline at end of file diff --git a/model_templates/mimic/lstm_default_full_set/phenotyping/loss.png b/model_templates/mimic/lstm_default_full_set/phenotyping/loss.png deleted file mode 100644 index 81e9623..0000000 Binary files a/model_templates/mimic/lstm_default_full_set/phenotyping/loss.png and /dev/null differ diff --git a/src/datasets/processors/__init__.py b/src/datasets/processors/__init__.py index 3b491b8..46cb3bd 100644 --- a/src/datasets/processors/__init__.py +++ b/src/datasets/processors/__init__.py @@ -426,6 +426,7 @@ def init(preprocessor): continue debug_io(f"Missing subject is: {subject_id}", verbose=orig_verbose) try: + # Try to replace the missing subject to meet target subj = exclud_subj.pop() res = chain(res, diff --git a/src/datasets/processors/discretizers.py b/src/datasets/processors/discretizers.py index 72c422e..6a50cf1 100644 --- a/src/datasets/processors/discretizers.py +++ b/src/datasets/processors/discretizers.py @@ -277,13 +277,14 @@ def _transform(self, X_df = self._categorize_data(X_df) X_df = self._bin_data(X_df, y_df) X_df = self._impute_data(X_df) - self._X[subject_id][stay_id] = X_df + # Torch friendly dtype + self._X[subject_id][stay_id] = X_df.astype(np.float32) if self._deep_supervision: y_reindexed = y_df.reindex(self._X[subject_id][stay_id].index) - self._y[subject_id][stay_id] = y_reindexed.fillna(0) - self._M[subject_id][stay_id] = (~y_reindexed.isna()).astype(int) + self._y[subject_id][stay_id] = y_reindexed.fillna(0).astype(np.float32) + self._M[subject_id][stay_id] = (~y_reindexed.isna()).astype(np.int8) else: - self._y[subject_id][stay_id] = y_df + self._y[subject_id][stay_id] = y_df.astype(np.float32) # Based on y_dict not self._y so supervision mode agnostic n_samples = len(y_df) diff --git a/src/datasets/readers.py b/src/datasets/readers.py index e95d047..5543b2d 100644 --- a/src/datasets/readers.py +++ b/src/datasets/readers.py @@ -988,6 +988,8 @@ def to_numpy(self, normalize_inputs: bool = False, read_timestamps: bool = False, data_type=None, + bining: str = "none", + one_hot: bool = False, return_ids: bool = False, seed: int = 42): """ @@ -1037,6 +1039,10 @@ def to_numpy(self, ValueError If `data_type` is not one of the possible data types (pd.DataFrame, np.ndarray, None). """ + if one_hot and bining == "none": + warn_io("One hot encoding is specified but no bining is applied." + " Ignoring one hot encoding.") + if subject_ids: if n_samples: warn_io("Both n_samples and subject_ids are specified. Ignoring n_samples.") @@ -1060,22 +1066,25 @@ def to_numpy(self, dataset[prefix] = dataset[prefix][:min(n_samples, len(dataset[prefix]))] buffer_dataset = dict(zip(prefices, [[] for _ in range(len(prefices))])) - # TODO! buffer_dataset["M"] = [] n_samples = len(dataset["X"]) if not deep_supervision: for sample in range(n_samples): X_df, y_df = dataset["X"][sample], dataset["y"][sample] - X_dfs, y_dfs, ts = read_timeseries(X_df, y_df, dtype=pd.DataFrame) - # TODO! masks = [y_df] + X_dfs, y_dfs, ts = read_timeseries(X_df, + y_df, + bining=bining, + one_hot=one_hot, + dtype=pd.DataFrame) buffer_dataset["X"].extend(X_dfs) buffer_dataset["y"].extend(y_dfs) - # TODO! buffer_dataset["M"].extend(np.ones) + else: + raise NotImplementedError("apply bining here") dataset = buffer_dataset del buffer_dataset # Normalize lengths on the smallest times stamp - if deep_supervision and normalize_inputs: + if normalize_inputs: for idx in range(n_samples): length = min([int(dataset[prefix][idx].index[-1]) \ for prefix in prefices]) diff --git a/src/metrics/__init__.py b/src/metrics/__init__.py index a42fed9..9fb8ab5 100644 --- a/src/metrics/__init__.py +++ b/src/metrics/__init__.py @@ -20,10 +20,10 @@ class CustomBins: def get_bin_custom(x, one_hot=False): index = bisect.bisect_right(CustomBins.lower_bounds, x) - 1 if one_hot: - ret = np.zeros((CustomBins.nbins,)) + ret = np.zeros((CustomBins.nbins,), dtype=np.int8) ret[index] = 1 return ret - return index + return np.int8(index) class LogBins: @@ -41,7 +41,7 @@ def get_bin_log(x, nbins=10, one_hot=False): binid = nbins - 1 if one_hot: - ret = np.zeros((LogBins.nbins,)) + ret = np.zeros((LogBins.nbins,), dtype=np.int8) ret[binid] = 1 return ret return binid diff --git a/src/metrics/pytorch.py b/src/metrics/pytorch.py index 74dface..5df876d 100644 --- a/src/metrics/pytorch.py +++ b/src/metrics/pytorch.py @@ -1,29 +1,43 @@ -from torcheval.metrics import BinaryAUPRC, MulticlassAUPRC, MultilabelAUPRC +from torch import Tensor +from typing import Type, Literal, Optional, Union, List, Any +from torcheval.metrics import BinaryAUPRC, MulticlassAUPRC, MultilabelAUPRC, metric +from torchmetrics import AUROC as _AUROC from copy import deepcopy # TODO! This absolutetly needs testing -class AUCPRC(object): +class AUPRC(metric.Metric[Tensor]): - def __init__(self, task: str, num_classes: int = 1): - if task == "binary": - self.metric = BinaryAUPRC() + def __new__(cls, + task: Literal["binary", "multiclass", "multilabel"], + num_labels: int = 1, + average=Literal["macro", "weighted", "none", "micro"]): + if average not in ["macro", "micro", "none"]: + raise ValueError("Average must be one of 'macro', 'micro', or 'none'" + f" but is {average}") + + if task == "binary" or average == "micro": + metric = BinaryAUPRC() elif task == "multiclass": - self.metric = MulticlassAUPRC(num_classes=num_classes) + # Some debate in the net but in torch this is one-vs-all + metric = MulticlassAUPRC(num_classes=num_labels, average=average) elif task == "multilabel": - self.metric = MultilabelAUPRC(num_labels=num_classes) + # This is multiple positives allowed + metric = MultilabelAUPRC(num_labels=num_labels, average=average) else: raise ValueError("Unsupported task type or activation function") - self._task = task + metric._task = task + metric._average = average + + return metric def update(self, predictions, labels): # Reshape predictions and labels to handle the batch dimension - if self._task == "binary": + if self._task == "binary" or self._average == "micro": predictions = predictions.view(-1) labels = labels.view(-1) - else: - predictions = predictions.view(-1, predictions.shape[-1]) + elif self._task == "multiclass": labels = labels.view(-1) self.metric.update(predictions, labels) @@ -33,36 +47,141 @@ def to(self, device): self.metric = self.metric.to(device) return self - def __getattr__(self, name): - # Redirect attribute access to self.metric if it exists there - if hasattr(self.metric, name) and not name in ["update", "to", "__dict__"]: - return getattr(self.metric, name) - # if name in self.__dict__: - # return self.__dict__[name] - # raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") - - def __setattr__(self, name, value): - if name in ['_task', 'metric']: - # Set attributes normally if they are part of the AUCPRC class - super().__setattr__(name, value) - elif hasattr(self, 'metric') and hasattr(self.metric, name): - # Redirect attribute setting to self.metric if it exists there - setattr(self.metric, name, value) - else: - # Set attributes normally otherwise - super().__setattr__(name, value) - def __deepcopy__(self, memo): - # Create a new instance of the class - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result +class AUROC(_AUROC): + + def __new__( + cls: Type["_AUROC"], + task: Literal["binary", "multiclass", "multilabel"], + thresholds: Optional[Union[int, List[float], Tensor]] = None, + num_labels: Optional[int] = None, + average: Optional[Literal["macro", "weighted", "none", "micro"]] = "macro", + max_fpr: Optional[float] = None, + ignore_index: Optional[int] = None, + validate_args: bool = True, + ): + if average == "micro" and task == "multilabel": + task = "binary" + + metric = super().__new__(cls, + task=task, + thresholds=thresholds, + num_classes=num_labels, + num_labels=num_labels, + average="none" if average == "micro" else average, + max_fpr=max_fpr, + ignore_index=ignore_index, + validate_args=validate_args) + metric._average = average + return metric + + # You might want to override update and compute methods if needed + def update(self, input: Tensor, target: Tensor, weight: Tensor = None, *args, **kwargs) -> None: + if self._average == "micro": + target = target.view(-1) + input = input.view(-1) + return self.update(input, target, weight, *args, **kwargs) + + def compute(self) -> Tensor: + return self.compute() + + +if __name__ == "__main__": + # Multi-class classification data + import torch + # Compute precision-recall curve + import numpy as np + from sklearn.metrics import roc_auc_score, precision_recall_curve, auc + + # + y_true_multi = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1], + [1, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 1]]).int() + y_pred_multi = torch.Tensor([[0.8, 0.1, 0.1], [0.2, 0.7, 0.1], [0.1, 0.3, 0.6], [0.7, 0.2, 0.1], + [0.1, 0.6, 0.3], [0.2, 0.1, 0.7], [0.6, 0.3, 0.1], [0.3, 0.5, 0.2], + [0.2, 0.1, 0.7], [0.7, 0.2, 0.1]]) + + # ---------------- Comparing Micro-Macro PR AUC using torch with sklearn -------------------- + print("--- Comparing Micro-Macro ROC AUC ---") + + # Compute ours: micro + micro_rocauc = AUROC(task="multilabel", average="micro", num_classes=3) + micro_rocauc.update(y_pred_multi, y_true_multi) + print("Micro AUCROC (torch):", micro_rocauc.compute()) + + # Compute ours: macro + macro_rocauc = AUROC(task="multilabel", average="macro", num_classes=3) + macro_rocauc.update(y_pred_multi, y_true_multi) + print("Micro AUCPRC (torch):", macro_rocauc.compute()) + + # Compute theirs + # Flatten y_true_multi as numpy + y_true_multi_flat = y_true_multi.numpy().flatten() + y_pred_multi_flat = y_pred_multi.numpy().flatten() + + # Compute micro-average ROC AUC using sklearn + micro_rocauc_sklearn = roc_auc_score(y_true_multi, + y_pred_multi, + average='micro', + multi_class='ovr') + print(f'Micro-average auc-roc (sklearn): {micro_rocauc_sklearn:.4f}') + + # Compute macro-average ROC AUC using sklearn + macro_rocauc_sklearn = roc_auc_score(y_true_multi, + y_pred_multi, + average='macro', + multi_class='ovr') + print(f'Macro-average auc-roc (sklearn): {macro_rocauc_sklearn:.4f}') + + # ---------------- Comparing Micro-Macro PR AUC using torch with sklearn -------------------- + print("--- Comparing Micro-Macro PR AUC ---") + micro_prauc = AUPRC(task="multilabel", num_labels=3, average="micro") + macro_prauc = AUPRC(task="multilabel", num_labels=3, average="macro") + + # Compute ours + for idx in range(len(y_true_multi)): + yt = y_true_multi[idx, :].unsqueeze(0) + yp = y_pred_multi[idx, :].unsqueeze(0) + micro_prauc.update(yp, yt) + macro_prauc.update(yp, yt) + + print("Micro AUCPR (torch):", micro_prauc.compute()) + print("Macro AUCPR (torch):", macro_prauc.compute()) + + # Compute theirs + roc_pr_list = [] + roc_auc_list = [] + + # Iterate over each class + for i in range(y_true_multi.shape[1]): + y_true = y_true_multi[:, i] + y_pred = y_pred_multi[:, i] + + # Compute precision-recall curve + precision, recall, _ = precision_recall_curve(y_true, y_pred) + roc_pr_list.append(auc(recall, precision)) + + # Compute ROC AUC score + roc_auc = roc_auc_score(y_true, y_pred) + roc_auc_list.append(roc_auc) + + print(f"PR AUC macro Score (sklearn): {np.mean(roc_pr_list)}") + + precision, recall, _ = precision_recall_curve(y_true_multi_flat, y_pred_multi_flat) + pr_auc = auc(recall, precision) - # Copy the _task attribute - setattr(result, '_task', deepcopy(self._task, memo)) + # Print results + print(f"PR AUC micro Score (sklearn): {pr_auc}") + print() + # ---------------- Comparing Binary PR AUC using torch with sklearn -------------------- + prauc = AUPRC(task="binary") + prauc.update(y_pred_multi.flatten(), y_true_multi.flatten()) - # Deep copy the metric attribute - if hasattr(self, 'metric'): - setattr(result, 'metric', deepcopy(self.metric, memo)) + print("Binary AUCPR (torch):", prauc.compute()) + from torcheval.metrics.functional import binary_auprc + binary_auprc(y_pred_multi.flatten(), y_true_multi.flatten()) + print("Binary AUCPR functional (torch):", prauc.compute()) + precision, recall, _ = precision_recall_curve(y_true_multi_flat, y_pred_multi_flat) + pr_auc = auc(recall, precision) - return result + # Print results + print(f"Binary PRAUC Score (sklearn): {pr_auc}") diff --git a/src/metrics/stream.py b/src/metrics/stream.py index eff40d3..c90afb5 100644 --- a/src/metrics/stream.py +++ b/src/metrics/stream.py @@ -116,8 +116,6 @@ def safe_div(a, b): class MicroROCAUC(river_metrics.ROCAUC): def __init__(self, n_thresholds=10, pos_val=True): - self._y_true_all = [] - self._y_pred_all = [] super().__init__(n_thresholds=n_thresholds, pos_val=pos_val) def update(self, y_true: dict, y_pred: dict): @@ -137,8 +135,8 @@ def works_with(self, y_true: dict, y_pred: dict): class MacroROCAUC(river_metrics.base.MultiClassMetric): def __init__(self, n_thresholds=10, pos_val=True): - self._n_thresholds = n_thresholds - self._pos_val = pos_val + self._n_thresholds = n_thresholds # TODO! does nothing + self._pos_val = pos_val # TODO! does nothing self._per_class_rocaucs = collections.defaultdict(river_metrics.ROCAUC) self._classes = set() diff --git a/src/models/pytorch/__init__.py b/src/models/pytorch/__init__.py index aa34756..183fa6f 100644 --- a/src/models/pytorch/__init__.py +++ b/src/models/pytorch/__init__.py @@ -4,6 +4,7 @@ import numpy as np import pickle import warnings +from types import FunctionType from typing import List, Tuple from copy import deepcopy from typing import Union, Dict, overload, Optional @@ -126,7 +127,7 @@ def _get_metrics(self, metrics: Dict[str, Metric]) -> List[Tuple[str, float]]: def _init_metrics(self, metrics, prefices: list = ["train", "val", "test"]) -> Dict[str, Metric]: - settings = {"task": self._task, "num_classes": self._num_classes} + settings = {"task": self._task, "num_labels": self._num_classes} return_metrics = {"loss": {"obj": None, "value": 0.0}} # Creat the base metric dict @@ -142,7 +143,7 @@ def _init_metrics(self, except: metric_name = "unknonw" # If type, instantiate - if isinstance(metric, type): + if isinstance(metric, (type, FunctionType)): metric = metric(**settings) return_metrics[metric_name] = {"obj": metric.to(self._device), "value": 0.0} @@ -362,7 +363,7 @@ def _train_with_arrays(self, data_size = x.shape[0] idx = np.random.permutation(len(x)) x = torch.tensor(x[idx, :, :], dtype=torch.float32).to(self._device) - y = torch.tensor(y[idx, :, :], dtype=torch.float32).to(self._device) + y = torch.tensor(y[idx, :, :]).to(self._device) self._on_epoch_start(data_size, batch_size, has_val) @@ -387,17 +388,23 @@ def _train_with_arrays(self, if len(input.shape) < 3: input = input.unsqueeze(0) - if len(label.shape) < 3: - label = label.unsqueeze(0) + # if len(label.shape) < 3: + # label = label.unsqueeze(0) # labels = labels * masks output = self(input, masks=mask) if masking_flag: output = torch.masked_select(output, mask) label = torch.masked_select(label, mask) - # Accumulate outputs and labels - aggr_outputs.append(output.view(-1)) - aggr_labels.append(label.view(-1)) + # Accumulate outputs and labels either flat or with dim of multilabel + aggr_outputs.append(output) + aggr_labels.append(label) + #aggr_outputs.append( + # output.view(*(-1,) if self._num_classes == 1 else (-1, self._num_classes),)) + #aggr_labels.append( + # label.view( + # *(-1,) if self._num_classes == 1 or self._final_activation == "softmax" else + # (-1, self._num_classes),)) # Optimizer network on abtch aggr_outputs, \ @@ -449,7 +456,7 @@ def _train_with_dataloader(self, output = torch.masked_select(output, mask) label = torch.masked_select(label, mask) # Accumulate outputs and labels - aggr_outputs.append(output.view(-1)) + aggr_outputs.append(output.view(-1, self._num_classes)) aggr_labels.append(label.view(-1)) # Optimizer network on abtch @@ -469,7 +476,12 @@ def _optimize_batch(self, outputs: list, labels: list, finalize: bool = False): if self._sample_count >= self._batch_size: # Concatenate accumulated outputs and labels outputs = torch.cat(outputs) - labels = torch.cat(labels) + + # If multilabel, labels are one-hot, else they are sparse + if self._task == "multilabel": + labels = torch.cat(labels, axis=1).T + else: + labels = torch.cat(labels) # Compute loss loss = self._loss(outputs, labels) diff --git a/src/models/pytorch/mappings.py b/src/models/pytorch/mappings.py index b154742..5f5db62 100644 --- a/src/models/pytorch/mappings.py +++ b/src/models/pytorch/mappings.py @@ -1,7 +1,7 @@ import torch.optim as optim import torch.nn as nn import torchmetrics -from metrics.pytorch import AUCPRC +from metrics.pytorch import AUPRC, AUROC __all__ = ["optimizer_mapping", "loss_mapping", "metric_mapping", "activation_mapping"] @@ -43,7 +43,10 @@ "mape": torchmetrics.MeanAbsolutePercentageError, "confusion_matrix": torchmetrics.ConfusionMatrix, "cohen_kappa": torchmetrics.CohenKappa, - "pr_auc": AUCPRC + "micro_roc_auc": lambda *args, **kwargs: AUROC(*args, average="micro", **kwargs), + "macro_roc_auc": lambda *args, **kwargs: AUROC(*args, average="macro", **kwargs), + "micro_pr_auc": lambda *args, **kwargs: AUPRC(*args, average="micro", **kwargs), + "macro_pr_auc": lambda *args, **kwargs: AUPRC(*args, average="macro", **kwargs), } activation_mapping = {"sigmoid": nn.Sigmoid(), "softmax": nn.Softmax(dim=-1), "relu": nn.ReLU()} diff --git a/src/models/tf2/lstm.py b/src/models/tf2/lstm.py index 02f84ed..e8d4045 100644 --- a/src/models/tf2/lstm.py +++ b/src/models/tf2/lstm.py @@ -29,9 +29,15 @@ def __init__(self, self.recurrent_dropout = recurrent_dropout self.depth = depth - if not final_activation in activation_names: - raise ValueError(f"Activation function {final_activation} not supported. " - f"Must be one of {*activation_names,}") + if final_activation is None: + if output_dim == 1: + self._final_activation = "sigmoid" + else: + self._final_activation = "softmax" + else: + if not final_activation in activation_names: + raise ValueError(f"Activation function {final_activation} not supported. " + f"Must be one of {*activation_names,}") if isinstance(layer_size, int): self._hidden_sizes = [layer_size] * depth diff --git a/src/models/tf2/mappings.py b/src/models/tf2/mappings.py index f7e3bdf..955cc10 100644 --- a/src/models/tf2/mappings.py +++ b/src/models/tf2/mappings.py @@ -7,8 +7,8 @@ ] metric_mapping = { - "roc_auc": metrics.AUC(50, curve="ROC"), - "pr_auc": metrics.AUC(50, curve="PR"), + "roc_auc": lambda *args, **kwargs: metrics.AUC(*args, curve="ROC", **kwargs), + "pr_auc": lambda *args, **kwargs: metrics.AUC(*args, curve="ROC", **kwargs), "accuracy": "accuracy", "acc": "acc", "binary_accuracy": "binary_accuracy", diff --git a/src/settings.py b/src/settings.py index 7713891..9e6c2cd 100644 --- a/src/settings.py +++ b/src/settings.py @@ -2,7 +2,6 @@ import os from pathlib import Path from dotenv import load_dotenv -from utils.IO import * load_dotenv(verbose=False) diff --git a/src/utils/__init__.py b/src/utils/__init__.py index 314647f..deda4f5 100644 --- a/src/utils/__init__.py +++ b/src/utils/__init__.py @@ -10,7 +10,6 @@ import numpy as np import pandas as pd from typing import Dict, Union -from multipledispatch import dispatch from metrics import CustomBins, LogBins from utils.IO import * from pathlib import Path @@ -31,13 +30,14 @@ def read_timeseries(X_df: pd.DataFrame, y_df: pd.DataFrame, row_only=False, bining="none", + one_hot=False, dtype=np.ndarray): if bining == "log": - y = y_df.applymap(LogBins.get_bin_log) + y_df = y_df.apply(lambda x: LogBins.get_bin_log(x, one_hot=one_hot)) elif bining == "custom": - y = y_df.applymap(CustomBins.get_bin_custom) + y_df = y_df.apply(lambda x: CustomBins.get_bin_custom(x, one_hot=one_hot)) else: - y = y_df + y_df = y_df.astype(np.float32) if row_only: Xs = [ @@ -50,8 +50,7 @@ def read_timeseries(X_df: pd.DataFrame, for timestamp in y_df.index ] - indices = np.random.permutation(len(Xs)) - ys = y.squeeze(axis=1).values.tolist() + ys = y_df.squeeze(axis=1).values ts = y_df.index.tolist() return Xs, ys, ts diff --git a/tests/etc/convert_columns.py b/tests/etc/benchmark_scripts/convert_columns.py similarity index 100% rename from tests/etc/convert_columns.py rename to tests/etc/benchmark_scripts/convert_columns.py diff --git a/tests/etc/discretize_data.py b/tests/etc/benchmark_scripts/discretize_data.py similarity index 100% rename from tests/etc/discretize_data.py rename to tests/etc/benchmark_scripts/discretize_data.py diff --git a/tests/etc/engineer_data.py b/tests/etc/benchmark_scripts/engineer_data.py similarity index 100% rename from tests/etc/engineer_data.py rename to tests/etc/benchmark_scripts/engineer_data.py diff --git a/tests/etc/rename_files.py b/tests/etc/benchmark_scripts/rename_files.py similarity index 100% rename from tests/etc/rename_files.py rename to tests/etc/benchmark_scripts/rename_files.py diff --git a/tests/etc/revert_split.py b/tests/etc/benchmark_scripts/revert_split.py similarity index 100% rename from tests/etc/revert_split.py rename to tests/etc/benchmark_scripts/revert_split.py diff --git a/tests/etc/setup.sh b/tests/etc/setup.sh index e6f0973..c3bbfb3 100644 --- a/tests/etc/setup.sh +++ b/tests/etc/setup.sh @@ -31,7 +31,7 @@ testFolder=$(dirname $(dirname $SCRIPT)) # Download the MIMIC-III demo dataset from the web destinationDir="$testFolder/data/" -convertScript="$testFolder/etc/convert_columns.py" +convertScript="$testFolder/etc/benchmark_scriptsconvert_columns.py" if [ ! -d "$destinationDir/physionet.org" ]; then echo "Downloading the MIMIC-III demo dataset directory..." @@ -58,10 +58,10 @@ if [ ! -d "$generatedDir" ]; then git clone "https://github.com/YerevaNN/mimic3-benchmarks.git" $generatedDir fi -renameScript="$testFolder/etc/rename_files.py" -revertSplitScript="$testFolder/etc/revert_split.py" -engineScript="$testFolder/etc/engineer_data.py" -discretizerScript="$testFolder/etc/discretize_data.py" +renameScript="$testFolder/etc/benchmark_scriptsrename_files.py" +revertSplitScript="$testFolder/etc/benchmark_scriptsrevert_split.py" +engineScript="$testFolder/etc/benchmark_scriptsengineer_data.py" +discretizerScript="$testFolder/etc/benchmark_scriptsdiscretize_data.py" # Change into the MIMIC-III benchmarks directory currentDir=$(pwd) diff --git a/tests/msettings.py b/tests/msettings.py new file mode 100644 index 0000000..bd59460 --- /dev/null +++ b/tests/msettings.py @@ -0,0 +1,221 @@ +from dotenv import load_dotenv +from settings import * + +load_dotenv(verbose=False) + +__all__ = [ + 'NETWORK_METRICS', + 'NETWORK_CRITERIONS', + 'GENERATOR_OPTIONS', + 'FINAL_ACTIVATIONS', + 'OUTPUT_DIMENSIONS', + 'STANDARD_LSTM_PARAMS', + 'STANDARD_LSTM_DS_PARAMS', + 'CHANNEL_WISE_LSTM_PARAMS', + 'CHANNEL_WISE_LSTM_DS_PARAMS', + 'MULTITASK_STANDARD_LSTM_PARAMS', + 'MULTITASK_CHANNEL_WISE_LSTM_PARAMS', +] + +# ------------------------- metric settings ------------------------- +# -> Metrics do with task +NETWORK_METRICS = { + "IHM": ["roc_auc", "pr_auc"], + "DECOMP": [ + "roc_auc", + "pr_auc", + ], + "LOS": ["cohen_kappa", "mae"], + "PHENO": ["micro_roc_auc", "macro_roc_auc"] +} + +NETWORK_CRITERIONS = { + "IHM": "binary_crossentropy", + "DECOMP": "binary_crossentropy", + "LOS": "categorical_crossentropy", # Is multilabel + "PHENO": "binary_crossentropy" # Is multiclass +} + +GENERATOR_OPTIONS = { + "IHM": {}, + "DECOMP": {}, + "LOS": { + "bining": + "custom" # Can be custom, log and None and has effects on the bining of the target + }, + "PHENO": {}, +} + +FINAL_ACTIVATIONS = { + "IHM": "sigmoid", + "DECOMP": "sigmoid", + "LOS": "softmax", # if partition is none then relu (No negative remaining los) + "PHENO": "sigmoid", +} + +OUTPUT_DIMENSIONS = { + "IHM": 1, + "DECOMP": 1, + "LOS": 10, # if partition is none then only 1 + "PHENO": 25, +} + +# ------------------------- standard lstm settings -------------------- +# Optimizer is always adam lr=0.001, beta=0.99 +# Batch size is always 8 +# Input dim is always 59 +# Batch size is at top level since its needed for generator (tf2) or fit method +STANDARD_LSTM_PARAMS = { + "IHM": { # Settings for the in-hospital mortality task + "model": { + "layer_size": 16, + "depth": 2 + } + }, + "DECOMP": { # Settings for the decompensation task + "model": { + "layer_size": 128, + "depth": 1 + } + }, + "LOS": { # Settings for the length of stay task + "model": { + "layer_size": 64, + "depth": 1 + } + }, + "PHENO": { # Settings for the phenotyping task + "model": { + "layer_size": 256, + "depth": 1 + } + } +} + +# ------------------------- standard lstm with deep supervision settings -------------------- +STANDARD_LSTM_DS_PARAMS = { + "PHENO": { + "model": { + "layer_size": 256, + "depth": 1, + "target_repl_coef": 0.5 + } + }, + "DECOMP": { + "model": { + "layer_size": 128, + "depth": 1, + } + }, + "IHM": { + "model": { + "layer_size": 32, + "depth": 1, + "target_repl_coef": 0.5 + } + }, + "LOS": { + "model": { + "layer_size": 128, + "depth": 1 + } + } +} + +# ------------------------- channel wise lstm settings -------------------- +CHANNEL_WISE_LSTM_PARAMS = { + "IHM": { # Settings for the in-hospital mortality task + "model": { + "layer_size": 8, + "depth": 1, + "size_coef": 4.0 + } + }, + "DECOMP": { # Settings for the decompensation task + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 4.0 + } + }, + "LOS": { # Settings for the length of stay task + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 8.0 + } + }, + "PHENO": { # Settings for the phenotyping task + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 8.0 + } + } +} + +# ------------------------- channel-wise lstm with deep supervision settings -------------------- +CHANNEL_WISE_LSTM_DS_PARAMS = { + "PHENO": { + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 8.0, + "target_repl_coef": 0.5 + } + }, + "DECOMP": { + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 8.0, + "deep_supervision": True + } + }, + "IHM": { + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 4.0, + "target_repl_coef": 0.5 + } + }, + "LOS": { + "model": { + "layer_size": 16, + "depth": 1, + "size_coef": 8.0, + "deep_supervision": True + } + } +} + +# ------------------------- multitask lstm settings -------------------- +MULTITASK_STANDARD_LSTM_PARAMS = { + "dim": 512, + "depth": 1, + "dropout": 0.3, + "batch_size": 8, + "timestep": 1.0, + "partition": "custom", + "ihm_C": 0.2, + "decomp_C": 1.0, + "los_C": 1.5, + "pheno_C": 1.0, + "target_repl_coef": 0.5 +} + +MULTITASK_CHANNEL_WISE_LSTM_PARAMS = { + "dim": 16, + "size_coef": 8.0, + "depth": 1, + "dropout": 0.3, + "batch_size": 8, + "timestep": 1.0, + "partition": "custom", + "ihm_C": 0.2, + "decomp_C": 1.0, + "los_C": 1.5, + "pheno_C": 1.0, + "target_repl_coef": 0.5 +} diff --git a/tests/test_models/test_pytorch/test_lstm.py b/tests/test_models/test_pytorch/test_lstm.py index 7ad5c3b..c58890a 100644 --- a/tests/test_models/test_pytorch/test_lstm.py +++ b/tests/test_models/test_pytorch/test_lstm.py @@ -1,7 +1,6 @@ import datasets import pytest -import ray -import multiprocessing as mp +import json from utils.IO import * from datasets.readers import ProcessedSetReader from typing import Dict @@ -12,6 +11,7 @@ from preprocessing.scalers import MinMaxScaler from generators.pytorch import TorchGenerator from models.pytorch.lstm import LSTMNetwork +from tests.msettings import * @pytest.mark.parametrize("data_flavour", ["generator", "numpy"]) @@ -29,14 +29,29 @@ def test_torch_lstm_with_deep_supervision( scaler = MinMaxScaler().fit_reader(reader) # -- Create the model -- - model = LSTMNetwork(128, 59, output_dim=1) + # Parameters + output_dim = OUTPUT_DIMENSIONS[task_name] + final_activation = FINAL_ACTIVATIONS[task_name] + model_dimensions = STANDARD_LSTM_DS_PARAMS[task_name]["model"] + # Obj + model = LSTMNetwork(input_dim=59, + output_dim=output_dim, + final_activation=final_activation, + **model_dimensions) # -- Compile the model -- - # criterion = nn.BCEWithLogitsLoss() - criterion = nn.BCELoss() - optimizer = optim.RMSprop(model.parameters(), lr=0.001) - model.compile(optimizer=optimizer, loss=criterion, metrics=["roc_auc", "pr_auc"]) - tests_io("Succeeded in creating the model") + # TODO! remvoe criterion = nn.BCEWithLogitsLoss() + criterion = NETWORK_CRITERIONS[task_name] + optimizer = optim.Adam(model.parameters(), lr=0.001) + model.compile(optimizer=optimizer, loss=criterion, metrics=NETWORK_METRICS[task_name]) + + # Let them know + tests_io(f"Succeeded in creating the model with:\n" + f"output dim: {output_dim}\n" + f"final_activation: {final_activation}\n" + f"model_dimension: {json.dumps(model_dimensions, indent=4)}\n" + f"criterion: {criterion}\n" + f"optimizer: Adam, lr=0.001") # -- fit -- if data_flavour == "generator": @@ -76,25 +91,56 @@ def test_torch_lstm( scaler = MinMaxScaler().fit_reader(reader) # -- Create the model -- - model = LSTMNetwork(128, 59, output_dim=1) + # Parameters + output_dim = OUTPUT_DIMENSIONS[task_name] + final_activation = FINAL_ACTIVATIONS[task_name] + model_dimensions = STANDARD_LSTM_PARAMS[task_name]["model"] + # Obj + model = LSTMNetwork(input_dim=59, + output_dim=output_dim, + final_activation=final_activation, + **model_dimensions) # -- Compile the model -- - criterion = nn.BCELoss() + criterion = NETWORK_CRITERIONS[task_name] optimizer = optim.Adam(model.parameters(), lr=0.001) - model.compile(optimizer=optimizer, loss=criterion, metrics=["roc_auc", "pr_auc"]) - tests_io("Succeeded in creating the model") + model.compile(optimizer=optimizer, loss=criterion, metrics=NETWORK_METRICS[task_name]) + + # Let them know + tests_io(f"Succeeded in creating the model with:\n" + f"output dim: {output_dim}\n" + f"final_activation: {final_activation}\n" + f"model_dimension: {json.dumps(model_dimensions, indent=4)}\n" + f"criterion: {criterion}\n" + f"optimizer: Adam, lr=0.001") # -- fit -- if data_flavour == "generator": # -- Create the generator -- - train_generator = TorchGenerator(reader=reader, scaler=scaler, shuffle=True) - history = model.fit(generator=train_generator, epochs=5, batch_size=8) + train_generator = TorchGenerator(reader=reader, + scaler=scaler, + shuffle=True, + **GENERATOR_OPTIONS[task_name]) tests_io("Succeeded in creating the generator") + + # -- Fitting the model -- + history = model.fit(generator=train_generator, epochs=5, batch_size=8) + elif data_flavour == "numpy": # -- Create the dataset -- - dataset = reader.to_numpy(scaler=scaler) - history = model.fit(dataset["X"], dataset["y"], batch_size=8, epochs=5) - tests_io("Succeeded in creating the numpy dataset") + tests_io("Loading the numpy dataset...", end="\r") + dataset = reader.to_numpy(scaler=scaler, **GENERATOR_OPTIONS[task_name]) + tests_io("Done loading the numpy dataset") + + # -- Fitting the model -- + if task_name == "IHM": + epochs = 20 + elif task_name == "PHENO": + epochs = 20 + else: + epochs = 5 + history = model.fit(dataset["X"], dataset["y"], batch_size=8, epochs=epochs) + assert min(list(history["train_loss"].values())) <= 1.5, \ f"Failed in asserting minimum loss ({min(list(history['train_loss'].values()))}) <= 1.5" assert max(list(history["train_metrics"]["roc_auc"].values())) >= 0.72, \ @@ -108,7 +154,7 @@ def test_torch_lstm( import shutil disc_reader = dict() for i in range(10): - for task_name in ["IHM", "DECOMP", "LOS", "PHENO"]: + for task_name in ["PHENO"]: # ["IHM", "DECOMP", "PHENO", "LOS"]: """ if Path(SEMITEMP_DIR, "discretized", task_name).exists(): shutil.rmtree(Path(SEMITEMP_DIR, "discretized", task_name)) diff --git a/tests/test_models/test_tf2/test_lstm.py b/tests/test_models/test_tf2/test_lstm.py index c1aa70b..c3dadb7 100644 --- a/tests/test_models/test_tf2/test_lstm.py +++ b/tests/test_models/test_tf2/test_lstm.py @@ -80,24 +80,45 @@ def test_tf2_lstm( scaler = MinMaxScaler().fit_reader(reader) # -- Create the model -- - model = LSTMNetwork(128, 59, recurrent_dropout=0., output_dim=1, final_activation='sigmoid') + if task_name in ["IHM", "DECOMP"]: + output_dim = 1 + elif task_name == "LOS": + output_dim = 10 + elif task_name == "PHENO": + output_dim = 25 + model = LSTMNetwork(128, 59, recurrent_dropout=0., output_dim=output_dim) # -- Compile the model -- + if task_name in ["IHM", "DECOMP"]: + loss = "binary_crossentropy" + elif task_name in ["LOS", "PHENO"]: + loss = "categorical_crossentropy" + optimizer = Adam(learning_rate=0.001, clipvalue=1.0) - model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=["roc_auc", "pr_auc"]) + model.compile(optimizer=optimizer, loss=loss, metrics=["roc_auc", "pr_auc"]) tests_io("Succeeded in creating the model") # -- fit -- if data_flavour == "generator": # -- Create the generator -- train_generator = TFGenerator(reader=reader, scaler=scaler, batch_size=8, shuffle=True) - history = model.fit(train_generator, epochs=5) tests_io("Succeeded in creating the generator") + + # -- Fitting the model -- + history = model.fit(train_generator, epochs=5) + elif data_flavour == "numpy": # -- Create the dataset -- - dataset = reader.to_numpy(scaler=scaler) + tests_io("Loading the numpy dataset...", end="\r") + # Binned with custom bins one LOS task + dataset = reader.to_numpy(scaler=scaler, + bining="custom" if task_name == "LOS" else "none", + one_hot=task_name == "LOS") + tests_io("Done loading the numpy dataset") + + # -- Fitting the model -- history = model.fit(dataset["X"], dataset["y"], batch_size=8, epochs=10) - tests_io("Succeeded in creating the numpy dataset") + assert min(list(history.history["loss"])) <= 1.3, \ f"Failed in asserting minimum loss ({min(list(history.history['loss']))}) <= 1.5" assert max(list(history.history["auc"])) >= 0.8, \ @@ -110,7 +131,7 @@ def test_tf2_lstm( if __name__ == "__main__": disc_reader = dict() for i in range(10): - for task_name in ["DECOMP"]: + for task_name in ["LOS"]: # reader = datasets.load_data(chunksize=75836, # source_path=TEST_DATA_DEMO, # storage_path=SEMITEMP_DIR, diff --git a/tests/tsettings.py b/tests/tsettings.py index 1ad5333..1d194e4 100644 --- a/tests/tsettings.py +++ b/tests/tsettings.py @@ -11,6 +11,7 @@ 'TEST_GT_DIR', 'TASK_NAMES', 'FTASK_NAMES', 'TASK_NAME_MAPPING' ] +# ------------------------- directory settings ------------------------- TEST_SETTINGS = json.load(Path(os.getenv("TESTS"), "etc", "test.json").open()) SEMITEMP_DIR = Path(os.getenv("WORKINGDIR"), "tests", "data", "semitemp") TEMP_DIR = Path(os.getenv("WORKINGDIR"), "tests", "data", "temp") @@ -19,6 +20,7 @@ TEST_DATA_DEMO = Path(TEST_DATA_DIR, "physionet.org", "files", "mimiciii-demo", "1.4") TEST_GT_DIR = Path(TEST_DATA_DIR, "generated-benchmark") +# ------------------------- name settings ------------------------- FTASK_NAMES = [ "in-hospital-mortality", "decompensation", "length-of-stay", "phenotyping", "multitask" ]