Skip to content

Commit

Permalink
feat: set created times same, epoch version
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-20 committed Jan 23, 2025
1 parent 1aa48ce commit cb0d2ba
Showing 1 changed file with 18 additions and 7 deletions.
25 changes: 18 additions & 7 deletions src/bulk_data_service/dataset_indexing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import uuid
from datetime import datetime
from typing import Any

from azure.storage.blob import BlobServiceClient
Expand All @@ -11,14 +12,17 @@
def create_and_upload_indices(
context: dict, datasets_in_bds: dict[uuid.UUID, dict], reporting_orgs_in_bds: dict[uuid.UUID, dict]
):
index_creation_time = get_timestamp()

context["logger"].info("Creating indices")

dataset_index_minimal = create_dataset_index_json(context, datasets_in_bds, reporting_orgs_in_bds, "minimal")
dataset_index_minimal = create_dataset_index_json(context, index_creation_time, datasets_in_bds, "minimal")

dataset_index_full = create_dataset_index_json(context, datasets_in_bds, reporting_orgs_in_bds, "full")
dataset_index_full = create_dataset_index_json(context, index_creation_time, datasets_in_bds, "full")

reporting_org_index_full = create_reporting_org_index_json(context, datasets_in_bds, reporting_orgs_in_bds)
reporting_org_index_full = create_reporting_org_index_json(
context, index_creation_time, datasets_in_bds, reporting_orgs_in_bds
)

upload_index_json_to_azure(context, get_dataset_index_name(context, "minimal"), dataset_index_minimal)

Expand Down Expand Up @@ -46,29 +50,36 @@ def upload_index_json_to_azure(context: dict, index_name: str, index_json: str):

def create_dataset_index_json(
context: dict,
created_time: datetime,
datasets_in_bds: dict[uuid.UUID, dict],
reporting_orgs_in_bds: dict[uuid.UUID, dict],
index_type: str,
) -> str:

index = {"index_created": get_timestamp(), "datasets": []}
index = create_index_time_entries(created_time)

index["datasets"] = get_dataset_index(context, datasets_in_bds, index_type)

return json.dumps(index, default=str, sort_keys=True, indent=True)


def create_reporting_org_index_json(
context: dict, datasets_in_bds: dict[uuid.UUID, dict], reporting_orgs_in_bds: dict[uuid.UUID, dict]
context: dict,
created_time: datetime,
datasets_in_bds: dict[uuid.UUID, dict],
reporting_orgs_in_bds: dict[uuid.UUID, dict],
) -> str:

index = {"index_created": get_timestamp(), "reporting_orgs": []}
index = create_index_time_entries(created_time)

index["reporting_orgs"] = get_reporting_orgs_for_datasets(context, datasets_in_bds, reporting_orgs_in_bds)

return json.dumps(index, default=str, sort_keys=True, indent=True)


def create_index_time_entries(created_time: datetime) -> dict[str, Any]:
return {"index_created": created_time, "index_created_epoch": int(created_time.timestamp())}


def get_reporting_orgs_for_datasets(
context: dict, datasets_in_bds: dict[uuid.UUID, dict], reporting_orgs_in_bds: dict[uuid.UUID, dict]
) -> list:
Expand Down

0 comments on commit cb0d2ba

Please sign in to comment.