diff --git a/scripts/migrations/MAINTAINER.md b/scripts/migrations/MAINTAINER.md new file mode 100644 index 0000000..80661fb --- /dev/null +++ b/scripts/migrations/MAINTAINER.md @@ -0,0 +1,4 @@ +As of this writing, migration #3 is safe to run any time you need +to regenerate the table column type data/data package cache (i.e. +if things are deleted, or in response to bugs in this process). +You should run the appropriate crawler first. \ No newline at end of file diff --git a/scripts/migrations/migration.003.data_packages.py b/scripts/migrations/migration.003.data_packages.py index 8ea6efd..791d8cc 100644 --- a/scripts/migrations/migration.003.data_packages.py +++ b/scripts/migrations/migration.003.data_packages.py @@ -108,8 +108,6 @@ def get_s3_json_as_dict(bucket, key: str): """reads a json object as dict (typically metadata in this case)""" s3_client = boto3.client("s3") bytes_buffer = io.BytesIO() - print(bucket) - print(key) s3_client.download_fileobj( Bucket=bucket, Key=key, @@ -136,23 +134,24 @@ def cache_api_data(s3_bucket_name: str, db: str) -> None: ) dp_details = [] for dp in list(data_packages): - dp_detail = { - "study": dp.split("__", 1)[0], - "name": dp.split("__", 1)[1], - } try: - versions = column_types[dp_detail["study"]][dp_detail["name"]] - for version in versions: - dp_details.append( - { - **dp_detail, - **versions[version], - "version": version, - "id": dp + "__" + version, - } - ) - except KeyError: + study, name, version = dp.split("__") + except ValueError: + print("invalid name: ", dp) continue + try: + matching_col_types = column_types[study][name] + dp_details.append( + { + "study": study, + "name": name, + "version": version, + **matching_col_types[dp], + "id": dp, + } + ) + except KeyError as e: + print("invalid key: ", e) s3_client.put_object( Bucket=s3_bucket_name, Key=f"{BucketPath.CACHE.value}/{JsonFilename.DATA_PACKAGES.value}.json", diff --git a/src/handlers/dashboard/get_csv.py b/src/handlers/dashboard/get_csv.py index a9099db..47fd19e 100644 --- a/src/handlers/dashboard/get_csv.py +++ b/src/handlers/dashboard/get_csv.py @@ -82,7 +82,7 @@ def get_csv_list_handler(event, context): del context s3_bucket_name = os.environ.get("BUCKET_NAME") s3_client = boto3.client("s3") - if event["path"].startswith("/last_valid"): + if event["path"].startswith("/last-valid"): key_prefix = "last_valid" url_prefix = "last_valid" elif event["path"].startswith("/aggregates"): diff --git a/tests/dashboard/test_get_csv.py b/tests/dashboard/test_get_csv.py index 65afd61..33eea44 100644 --- a/tests/dashboard/test_get_csv.py +++ b/tests/dashboard/test_get_csv.py @@ -125,7 +125,7 @@ def test_get_csv(mock_bucket, params, status, expected): does_not_raise(), ), ( - "/last_valid", + "/last-valid", 200, [ "last_valid/study/encounter/princeton_plainsboro_teaching_hospital/099/study__encounter__aggregate.csv" @@ -138,7 +138,7 @@ def test_get_csv(mock_bucket, params, status, expected): @mock.patch.dict(os.environ, mock_utils.MOCK_ENV) def test_get_csv_list(mock_bucket, path, status, expected, raises): with raises: - if path.startswith("/last_valid"): + if path.startswith("/last-valid"): _mock_last_valid() event = {"path": path} res = get_csv.get_csv_list_handler(event, {})