Skip to content

Commit

Permalink
Merge pull request #139 from CanDIG/daisieh/cohort-program
Browse files Browse the repository at this point in the history
DIG-1522: change cohort to program
  • Loading branch information
daisieh authored Nov 25, 2024
2 parents 35e7ce6 + d4a00d1 commit 0b28756
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 43 deletions.
23 changes: 17 additions & 6 deletions daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,36 @@ def ingest_file(file_path):
json_data = None
results = {}
results_path = os.path.join(DAEMON_PATH, "results", os.path.basename(file_path))
with open(file_path) as f:
json_data = json.load(f)
try:
with open(file_path) as f:
json_data = json.load(f)
except Exception as e:
message = f"Couldn't load data from {file_path}: {type(e)} {str(e)}"
logger.error(message)
results["error"] = message
if json_data is not None:
logger.info(f"Ingesting {file_path}")
if "katsu" in json_data:
json_data = json_data["katsu"]
programs = list(json_data.keys())
for program_id in programs:
ingest_results, status_code = ingest_schemas(json_data[program_id]["schemas"])
results[program_id] = ingest_results
try:
ingest_results, status_code = ingest_schemas(json_data[program_id]["schemas"])
results[program_id] = ingest_results
except Exception as e:
results[program_id] = f"Exception: {type(e)} {str(e)}"
elif "htsget" in json_data:
do_not_index = False
if "do_not_index" in json_data:
do_not_index = json_data["do_not_index"]
json_data = json_data["htsget"]
programs = list(json_data.keys())
for program_id in programs:
ingest_results, status_code = htsget_ingest(json_data[program_id], do_not_index)
results[program_id] = ingest_results
try:
ingest_results, status_code = htsget_ingest(json_data[program_id], do_not_index)
results[program_id] = ingest_results
except Exception as e:
results[program_id] = f"Exception: {type(e)} {str(e)}"
with open(results_path, "w") as f:
json.dump(results, f)
os.remove(file_path)
Expand Down
21 changes: 10 additions & 11 deletions htsget_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import re
import json
from ingest_result import IngestServerException, IngestUserException, IngestResult
import requests
import sys
from urllib.parse import urlparse
Expand Down Expand Up @@ -47,7 +46,7 @@ def link_genomic_data(sample, do_not_index=False):
genomic_drs_obj["id"] = sample["genomic_file_id"]
genomic_drs_obj["name"] = sample["genomic_file_id"]
genomic_drs_obj["description"] = sample["metadata"]["sequence_type"]
genomic_drs_obj["cohort"] = sample["program_id"]
genomic_drs_obj["program"] = sample["program_id"]
genomic_drs_obj["reference_genome"] = sample["metadata"]["reference"]
genomic_drs_obj["version"] = "v1"
if "contents" not in genomic_drs_obj:
Expand All @@ -71,7 +70,7 @@ def link_genomic_data(sample, do_not_index=False):
"id": clin_sample["submitter_sample_id"],
"name": clin_sample["submitter_sample_id"],
"description": "sample",
"cohort": sample["program_id"],
"program": sample["program_id"],
"version": "v1",
"contents": []
}
Expand Down Expand Up @@ -149,7 +148,7 @@ def add_file_drs_object(genomic_drs_obj, file, type, headers):
"id": file['name'],
"name": file['name'],
"description": type,
"cohort": genomic_drs_obj["cohort"],
"program": genomic_drs_obj["program"],
"version": "v1"
}
access_method = get_access_method(file["access_method"])
Expand Down Expand Up @@ -264,7 +263,7 @@ def htsget_ingest(ingest_json, do_not_index=False):
statistics = {}
for program_id in program_ids:
url = f"{HTSGET_URL}/htsget/v1/samples"
response = requests.get(url, headers=headers, params={"cohort": program_id})
response = requests.get(url, headers=headers, params={"program": program_id})
if response.status_code == 200:
for sample in response.json():
if program_id not in statistics:
Expand All @@ -279,13 +278,13 @@ def htsget_ingest(ingest_json, do_not_index=False):
result["errors"] = f"Could not collect completeness stats for program: {response.text}"

for program_id in statistics:
# get the cohort
url = f"{HTSGET_URL}/ga4gh/drs/v1/cohorts"
# get the program
url = f"{HTSGET_URL}/ga4gh/drs/v1/programs"
response = requests.get(f"{url}/{program_id}", headers=headers)
if response.status_code == 200:
cohort = response.json()
cohort["statistics"] = statistics[program_id]
response = requests.post(url, headers=headers, json=cohort)
program = response.json()
program["statistics"] = statistics[program_id]
response = requests.post(url, headers=headers, json=program)
if response.status_code != 200:
result["errors"] = f"Could not add statistics for program: {response.text}"
else:
Expand Down Expand Up @@ -355,7 +354,7 @@ def check_genomic_data(dataset, token):

def delete_program(program_id, token):
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
url = f"{HTSGET_URL}/ga4gh/drs/v1/cohorts/{program_id}"
url = f"{HTSGET_URL}/ga4gh/drs/v1/programs/{program_id}"

return requests.delete(url, headers=headers)

Expand Down
5 changes: 2 additions & 3 deletions ingest_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import urllib.parse

import auth
from ingest_result import *
import katsu_ingest
import htsget_ingest
from opa_ingest import remove_user_from_dataset, add_user_to_dataset
Expand All @@ -22,7 +21,7 @@
"SUCCESS": 0,
"UNAUTHORIZED": 1,
"VALIDATION": 2,
"COHORTEXISTS": 3,
"PROGRAMEXISTS": 3,
"INTERNAL": 4,
"AUTHORIZATIONERR": 5
}
Expand All @@ -32,7 +31,7 @@ def generateResponse(result, response_code):
0: ("Success", 200),
1: ("Unauthorized", 403),
2: ("Validation error", 422),
3: ("Cohort exists", 422),
3: ("Program exists", 422),
4: ("Internal CanDIG error", 500),
5: ("Authorization error", 401)
}
Expand Down
23 changes: 0 additions & 23 deletions ingest_result.py

This file was deleted.

0 comments on commit 0b28756

Please sign in to comment.