smart-on-fhir · dogversioning · Sep 4, 2024 · Sep 4, 2024 · Sep 4, 2024 · Sep 4, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -99,8 +99,8 @@ jobs:
           WG: cumulus
           DB: cumulus_library_regression_db
         run: |
-          cumulus-library build -t vocab -t core --profile $PROFILE --workgroup $WG --database $DB
-          cumulus-library export -t vocab -t core ./tests/regression/data_export/ --profile $PROFILE --workgroup $WG --database $DB
+          cumulus-library build -t core --profile $PROFILE --workgroup $WG --database $DB
+          cumulus-library export -t core ./tests/regression/data_export/ --profile $PROFILE --workgroup $WG --database $DB
       - name: Compare vs known data
         run: python ./tests/regression/run_regression.py
 

diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@ output.sql
 MRCONSO.RRF
 *.zip
 coverage.xml
+*.parquet
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/cumulus_library/studies/vocab/README.MD b/cumulus_library/studies/vocab/README.MD
diff --git a/cumulus_library/studies/vocab/icd/ICD10CM_2023AA.bsv b/cumulus_library/studies/vocab/icd/ICD10CM_2023AA.bsv
diff --git a/cumulus_library/studies/vocab/icd/ICD10PCS_2023AA.bsv b/cumulus_library/studies/vocab/icd/ICD10PCS_2023AA.bsv
diff --git a/cumulus_library/studies/vocab/icd/ICD9CM_2023AA.bsv b/cumulus_library/studies/vocab/icd/ICD9CM_2023AA.bsv
diff --git a/cumulus_library/studies/vocab/manifest.toml b/cumulus_library/studies/vocab/manifest.toml
diff --git a/cumulus_library/studies/vocab/reference_sql/vocab_icd_builder.sql b/cumulus_library/studies/vocab/reference_sql/vocab_icd_builder.sql
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -35,8 +35,6 @@
     "observation": [["id"], ["encounter", "reference"]],
     "patient": [["id"]],
 }
-VOCAB_ICD_ROW_COUNT = 403230
-
 # Utility functions
 
 

diff --git a/tests/regression/run_regression.py b/tests/regression/run_regression.py
@@ -85,19 +85,5 @@ def regress_core():
     print("✅ Core study reference and export matched ✅")
 
 
-def regress_vocab():
-    export_path = f"{Path(__file__).resolve().parent}/data_export/vocab"
-    with open(f"{export_path}/vocab__icd.csv") as f:
-        export_size = len(f.readlines())
-        # this is the value of
-        if export_size != VOCAB_ICD_ROW_COUNT:
-            sys.exit(
-                f"❌ Vocab tables built from parquets are not expected length."
-                f" Found rows: {export_size} ❌"
-            )
-        print("✅ Vocab tables built from parquets are expected length ✅")
-
-
 if __name__ == "__main__":
-    regress_vocab()
     regress_core()
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -316,7 +316,6 @@ def test_clean(tmp_path, args, expected, raises):
             3,
             does_not_raise(),
         ),
-        (["build", "-t", "vocab"], None, 3, does_not_raise()),
         (
             # checking that a study is loaded from the directory of a user-defined
             # path. we're also validating that the CLI accepts the statistics keyword

diff --git a/cumulus_library/studies/vocab/.gitignore → tests/test_data/study_static_file/.gitignore b/cumulus_library/studies/vocab/.gitignore → tests/test_data/study_static_file/.gitignore
diff --git a/cumulus_library/studies/vocab/__init__.py → ...s/test_data/study_static_file/__init__.py b/cumulus_library/studies/vocab/__init__.py → ...s/test_data/study_static_file/__init__.py
diff --git a/tests/test_data/study_static_file/bsvs/ICD10CM_2023AA.bsv b/tests/test_data/study_static_file/bsvs/ICD10CM_2023AA.bsv
@@ -0,0 +1,3 @@
+C0000727|ICD10CM|PT|R10.0|Acute abdomen
+C0000737|ICD10CM|PT|R10.9|Unspecified abdominal pain
+C0000744|ICD10CM|ET|E78.6|Abetalipoproteinemia
diff --git a/tests/test_data/study_static_file/bsvs/ICD10PCS_2023AA.bsv b/tests/test_data/study_static_file/bsvs/ICD10PCS_2023AA.bsv
@@ -0,0 +1,3 @@
+C0005491|ICD10PCS|PT|GZC9ZZZ|Biofeedback
+C0005491|ICD10PCS|PX|GZC9ZZZ|Mental Health @ None @ Biofeedback @ Other Biofeedback @ None @ None @ None
+C0010332|ICD10PCS|PT|GZ2ZZZZ|Crisis Intervention
diff --git a/cumulus_library/studies/vocab/icd_legend.sql → ...est_data/study_static_file/icd_legend.sql b/cumulus_library/studies/vocab/icd_legend.sql → ...est_data/study_static_file/icd_legend.sql
diff --git a/tests/test_data/study_static_file/manifest.toml b/tests/test_data/study_static_file/manifest.toml
@@ -0,0 +1,11 @@
+study_prefix = "study_static_file"
+
+[table_builder_config]
+file_names = [
+    "static_file_builder.py",
+]
+
+[export_config]
+export_list = [
+    "study_static_file__table"
+]
diff --git a/...ibrary/studies/vocab/vocab_icd_builder.py → .../study_static_file/static_file_builder.py b/...ibrary/studies/vocab/vocab_icd_builder.py → .../study_static_file/static_file_builder.py
@@ -8,7 +8,7 @@
 from cumulus_library.template_sql import base_templates
 
 
-class VocabIcdRunner(cumulus_library.BaseTableBuilder):
+class StaticFileBuilder(cumulus_library.BaseTableBuilder):
     display_text = "Creating ICD vocab..."
 
     def prepare_queries(
@@ -26,19 +26,19 @@ def prepare_queries(
         :param schema: the schema/db name, matching the cursor
         """
 
-        table_name = "vocab__icd"
+        table_name = "study_static_file__table"
         path = pathlib.Path(__file__).parent
-        icd_files = path.glob("icd/*.bsv")
+        icd_files = path.glob("bsvs/*.bsv")
         headers = ["CUI", "TTY", "CODE", "SAB", "STR"]
         header_types = ["STRING", "STRING", "STRING", "STRING", "STRING"]
         for file in icd_files:
-            parquet_path = path / f"icd/{file.stem}.parquet"
+            parquet_path = path / f"bsvs/{file.stem}.parquet"
             df = pandas.read_csv(file, delimiter="|", names=headers)
             df.to_parquet(parquet_path)
             remote_path = config.db.upload_file(
                 file=parquet_path,
-                study="vocab",
-                topic="icd",
+                study="study_static_file",
+                topic="static_file",
                 remote_filename=f"{file.stem}.parquet",
                 force_upload=config.force_upload,
             )
@@ -48,7 +48,7 @@ def prepare_queries(
             base_templates.get_ctas_from_parquet_query(
                 schema_name=config.schema,
                 table_name=table_name,
-                local_location=path / "icd",
+                local_location=path / "bsvs",
                 remote_location=remote_path,
                 table_cols=headers,
                 remote_table_cols_types=header_types,

diff --git a/tests/test_vocab.py → tests/test_static_file.py b/tests/test_vocab.py → tests/test_static_file.py
@@ -9,27 +9,13 @@
     os.environ,
     clear=True,
 )
-def test_vocab(tmp_path):
+def test_static_file(tmp_path):
     cli.main(
         cli_args=conftest.duckdb_args(
             [
                 "build",
                 "-t",
-                "core",
-                "-s",
-                "./tests/test_data",
-                "--database",
-                "test",
-            ],
-            tmp_path,
-        )
-    )
-    cli.main(
-        cli_args=conftest.duckdb_args(
-            [
-                "build",
-                "-t",
-                "vocab",
+                "study_static_file",
                 "-s",
                 "./tests/test_data",
                 "--database",
@@ -40,10 +26,10 @@ def test_vocab(tmp_path):
     )
     db = databases.DuckDatabaseBackend(f"{tmp_path}/duck.db")
     cursor = db.cursor()
-    table_rows, cols = conftest.get_sorted_table_data(cursor, "vocab__icd")
+    table_rows, cols = conftest.get_sorted_table_data(cursor, "study_static_file__table")
     expected_cols = {"CUI", "TTY", "CODE", "SAB", "STR"}
     found_cols = {col_schema[0] for col_schema in cols}
     assert expected_cols == found_cols
-    assert len(table_rows) == conftest.VOCAB_ICD_ROW_COUNT
+    assert len(table_rows) == 6
     assert table_rows[0] == ("C0000727", "ICD10CM", "PT", "R10.0", "Acute abdomen")
-    assert table_rows[-1] == ("C5700317", "ICD10CM", "HT", "M91.3", "Pseudocoxalgia")
+    assert table_rows[-1] == ("C0010332", "ICD10PCS", "PT", "GZ2ZZZZ", "Crisis Intervention")