Skip to content

Commit

Permalink
data from sirano
Browse files Browse the repository at this point in the history
  • Loading branch information
ericjeangirard committed Feb 13, 2024
1 parent 9c60deb commit 0923df8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
9 changes: 6 additions & 3 deletions bsoclinicaltrials/server/main/enrich_ct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from bsoclinicaltrials.server.main.strings import normalize
from bsoclinicaltrials.server.main.utils import chunks, get_dois_info

from bsoclinicaltrials.server.main.sirano import get_sirano

def tag_sponsor(x):
x_normalized = normalize(x)
Expand All @@ -16,8 +16,9 @@ def tag_sponsor(x):
def enrich(all_ct):
res = []
dois_to_get = []
sirano_dict = get_sirano()
for ct in all_ct:
enriched = enrich_ct(ct)
enriched = enrich_ct(ct, sirano_dict)
references = enriched.get('references', [])
for r in references:
if r.get('doi') and r.get('ReferenceType') in ['result', 'derived']:
Expand Down Expand Up @@ -76,7 +77,7 @@ def enrich(all_ct):
return res


def enrich_ct(ct):
def enrich_ct(ct, sirano_dict):
ct['study_start_year'] = None
if isinstance(ct.get('study_start_date'), str):
ct['study_start_year'] = int(ct['study_start_date'][0:4])
Expand Down Expand Up @@ -125,4 +126,6 @@ def enrich_ct(ct):
status_simplified = 'Ongoing'
ct['status_simplified'] = status_simplified
ct['bso_country'] = ['fr']
if isinstance(ct.get('NCTId'), str) and ct['NCTId'] in sirano_dict:
ct.update(sirano_dict[ct['NCTId']])
return ct
20 changes: 20 additions & 0 deletions bsoclinicaltrials/server/main/sirano.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pandas as pd
from bsoclinicaltrials.server.main.logger import get_logger

logger = get_logger(__name__)

url = "https://www.data.gouv.fr/fr/datasets/r/c156ce7f-1ec8-4381-b8b9-fe5d2f933168"

def get_sirano():
df = pd.read_csv(url, sep=';', encoding='iso-8859-1')
sirano_dict = {}
for ix, row in df.iterrows():
if isinstance(row.numero_registre_essais, str):
nct = row.numero_registre_essais
if nct not in sirano_dict:
sirano_dict[nct] = {'financement_total': 0.0, 'financements':[]}
sirano_dict[nct]['financements'].append(row.to_dict())
if isinstance(row.financement_total, float) or isinstance(row.financement_total, int):
sirano_dict[nct]['financement_total'] += row.financement_total
logger.debug(f'{len(sirano_dict)} essais dans SIRANo')
return sirano_dict
2 changes: 2 additions & 0 deletions bsoclinicaltrials/server/main/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from bsoclinicaltrials.server.main.enrich_ct import enrich
from bsoclinicaltrials.server.main.euctr import harvest_parse_euctr
from bsoclinicaltrials.server.main.merge_sources import merge_all
from bsoclinicaltrials.server.main.logger import get_logger

logger = get_logger(__name__)

def create_task_harvest(args: dict) -> dict:
source = args.get('source', '').lower()
Expand Down

0 comments on commit 0923df8

Please sign in to comment.