Skip to content

Commit

Permalink
Improved N3C Comparison report. Have made changes on old prod commit so
Browse files Browse the repository at this point in the history
that DLs can use it now without waiting for changes on newer code to be
released.
  • Loading branch information
Sigfried committed Oct 14, 2024
1 parent a436add commit 1c62e95
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 66 deletions.
127 changes: 83 additions & 44 deletions backend/routes/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from functools import cache
from typing import Dict, List, Union, Set

import pandas as pd
from fastapi import APIRouter, Query, Request
from sqlalchemy import Connection, Row
from sqlalchemy.engine import RowMapping
Expand All @@ -18,7 +19,7 @@
from backend.db.refresh import refresh_db
from backend.db.queries import get_concepts
from backend.db.utils import get_db_connection, sql_query, SCHEMA, sql_query_single_col, sql_in, sql_in_safe, run_sql
from backend.utils import return_err_with_trace, commify
from backend.utils import return_err_with_trace, commify, recs2dicts, dicts2dict
from enclave_wrangler.config import RESEARCHER_COLS
from enclave_wrangler.models import convert_rows
from enclave_wrangler.objects_api import get_n3c_recommended_csets, get_concept_set_version_expression_items, \
Expand Down Expand Up @@ -617,68 +618,106 @@ def single_n3c_comparison_rpt(pair: str):

return rpt[0] if rpt else None

def get_possible_replacement_concepts(concept_id):
with get_db_connection() as con:
q = f"""
SELECT DISTINCT
-- cr.concept_id_1,
-- c1.concept_name AS concept_name_1,
-- c1.vocabulary_id AS vocab_1,
-- c1.concept_class_id AS class_1,
-- c1.standard_concept AS sc_1,
-- c1.invalid_reason AS invalid_1,
c2.concept_id,
c2.concept_name,
c2.vocabulary_id,
c2.concept_class_id,
c2.standard_concept,
public.ARRAY_SORT(ARRAY_AGG(relationship_id)) rels
FROM concept_relationship cr
-- JOIN concepts_with_counts c1 ON cr.concept_id_1 = c1.concept_id
JOIN concepts_with_counts c2 ON cr.concept_id_2 = c2.concept_id
WHERE concept_id_1 = :concept_id
AND relationship_id in (
'Maps to',
'Mapped from',
'Concept alt_to from',
'Concept alt_to to',
'Concept poss_eq from',
'Concept poss_eq to',
'Concept replaced by',
'Concept replaces',
'Concept same_as from',
'Concept same_as to',
'Concept was_a from',
'Concept was_a to'
)
AND concept_id_1 != concept_id_2
GROUP BY 1,2,3,4,5;
"""
results = sql_query(con, q, {'concept_id': concept_id})
return results

@cache
def get_comparison_rpt(con, codeset_id_1: int, codeset_id_2: int) -> Dict[str, Union[str, None]]:
def get_comparison_rpt(codeset_id_1: int, codeset_id_2: int) -> Dict[str, Union[str, None]]:
cset_1 = get_csets([codeset_id_1])[0]
cset_2 = get_csets([codeset_id_2])[0]

cset_1_only = sql_query(con, """
SELECT 'removed ' || concept_id || ' ' || concept_name AS diff FROM (
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :codeset_id_1
EXCEPT
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id
) x
""", {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2})
cset_1_only = [dict(r)['diff'] for r in cset_1_only]

cset_2_only = sql_query(con, """
SELECT 'added ' || concept_id || ' ' || concept_name AS diff FROM (
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id
EXCEPT
SELECT concept_id, concept_name FROM concept_set_members WHERE codeset_id = :codeset_id_1
) x
""", {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2})
cset_2_only = [dict(r)['diff'] for r in cset_2_only]

diffs = cset_1_only + cset_2_only

removed = sql_query_single_col(con, """
SELECT concept_id FROM concept_set_members WHERE codeset_id = :codeset_id_1
EXCEPT
SELECT concept_id FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id
""", {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2})

added = sql_query_single_col(con, """
SELECT concept_id FROM concept_set_members WHERE codeset_id = :cset_2_codeset_id
EXCEPT
SELECT concept_id FROM concept_set_members WHERE codeset_id = :codeset_id_1
""", {'codeset_id_1': codeset_id_1, 'cset_2_codeset_id': codeset_id_2})

flag_cnts_1 = ', flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_1['flag_cnts'].items()]) if cset_1['flag_cnts'] else ''
flag_cnts_2 = ', flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_2['flag_cnts'].items()]) if cset_2['flag_cnts'] else ''
csmi_1 = get_cset_members_items([codeset_id_1], ['concept_id', 'csm','item','flags',])
csmi_2 = get_cset_members_items([codeset_id_2], ['concept_id', 'csm','item','flags',])

removed_cids = set([c['concept_id'] for c in csmi_1]).difference([c['concept_id'] for c in csmi_2])
removed = [dict(csmi) for csmi in csmi_1 if csmi['concept_id'] in removed_cids]
removed_concepts = get_concepts(removed_cids)
for rec in removed:
c = [c for c in removed_concepts if c['concept_id'] == rec['concept_id']]
if len(c) == 1:
rec['name'] = c[0]['concept_name']
rec['voc'] = c[0]['vocabulary_id']
rec['cls'] = c[0]['concept_class_id']
rec['std'] = c[0]['standard_concept']
replacements = recs2dicts(get_possible_replacement_concepts(rec['concept_id']))
rec['replacements'] = replacements

added_cids = set([c['concept_id'] for c in csmi_2]).difference([c['concept_id'] for c in csmi_1])
added = [dict(csmi) for csmi in csmi_2 if csmi['concept_id'] in added_cids]
added_concepts = get_concepts(added_cids)
for rec in added:
c = [c for c in added_concepts if c['concept_id'] == rec['concept_id']]
if len(c) == 1:
rec['name'] = c[0]['concept_name']
rec['voc'] = c[0]['vocabulary_id']
rec['cls'] = c[0]['concept_class_id']
rec['std'] = c[0]['standard_concept']

flag_cnts_1 = 'flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_1['flag_cnts'].items()]) if cset_1['flag_cnts'] else ''
flag_cnts_2 = 'flags: ' + ', '.join([f'{k}: {v}' for k, v in cset_2['flag_cnts'].items()]) if cset_2['flag_cnts'] else ''

# df = pd.DataFrame(replacements)

rpt = {
'name': cset_1['concept_set_name'],
'cset_1': f"{cset_1['codeset_id']} v{cset_1['version']}, "
f"vocab {cset_1['omop_vocab_version']}; "
f"{commify(cset_1['distinct_person_cnt'])} pts, "
f"{commify(cset_1['total_cnt'] or cset_1['total_cnt_from_term_usage'])} recs, "
f"{commify(cset_1['concepts'])} concepts{flag_cnts_1}",
f"{commify(cset_1['concepts'])} concepts, {flag_cnts_1}",
'cset_2': f"{cset_2['codeset_id']} v{cset_2['version']}, "
f"vocab {cset_2['omop_vocab_version']}; "
f"{commify(cset_2['distinct_person_cnt'])} pts, "
f"{commify(cset_2['total_cnt'] or cset_2['total_cnt_from_term_usage'])} recs, "
f"{commify(cset_2['concepts'])} concepts{flag_cnts_2}",
f"{commify(cset_2['concepts'])} concepts, {flag_cnts_2}",
'author': cset_1['codeset_creator'],
'cset_1_codeset_id': codeset_id_1,
'codeset_id_1': codeset_id_1,
# 'cset_1_version': cset_1['version'],
'cset_2_codeset_id': codeset_id_2,
'codeset_id_2': codeset_id_2,
'added': added,
'removed': removed,
# 'cset_2_version': cset_2['version'],
# 'cset_1_only': cset_1_only,
# 'cset_2_only': cset_2_only,
'diffs': diffs, # remove once front end working with new added/removed data
}
return rpt

Expand All @@ -691,15 +730,15 @@ def generate_n3c_comparison_rpt():
"""
SELECT orig_codeset_id, new_codeset_id
FROM public.codeset_comparison
--WHERE rpt IS NULL
WHERE rpt IS NULL
""")
i = 1
for pair in pairs:
pair = list(dict(pair).values())
print(f"Processing {str(pair)} {i} of {len(pairs)}")
i += 1

rpt = get_comparison_rpt(con, *pair)
rpt = get_comparison_rpt(*pair)

run_sql(con, """
UPDATE public.codeset_comparison
Expand Down
17 changes: 17 additions & 0 deletions backend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,23 @@ def cnt(vals):
return len(set(vals))


def recs2dicts(recs):
"""Return something like a RowMapping as an actual dict"""
return [dict(x) for x in recs]


def dicts2dict(dicts, key):
"""Return a list of dicts to a dict keyed by a property"""
d = {}
for r in dicts:
d[r[key]] = r

if len(d) != len(dicts):
raise ValueError(f"Duplicate keys");

return d


def dump(o):
"""Return pretty printed json"""
return json.dumps(o, indent=2)
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/components/CsetComparisonPage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ export async function fetchGraphData(props) {

comparison_rpt = await comparison_rpt;
if (comparison_rpt) {
specialConcepts.added = comparison_rpt.added.map(String);
specialConcepts.removed = comparison_rpt.removed.map(String);
specialConcepts.added = comparison_rpt.added.map(d => d.concept_id + '');
specialConcepts.removed = comparison_rpt.removed.map(d => d.concept_id + '');
}

for (let cid in conceptLookup) {
Expand Down
51 changes: 31 additions & 20 deletions frontend/src/components/N3CRecommended.jsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import React, { useState, useEffect, } from "react";
import DataTable, { createTheme } from "react-data-table-component";
import { flatten, uniq, sortBy } from "lodash";
import { flatten, uniq, sortBy, isEmpty } from "lodash";

import {backend_url, useDataGetter} from "../state/DataGetter";
import {useSearchParamsState} from "../state/SearchParamsProvider";
Expand Down Expand Up @@ -93,7 +93,7 @@ export const N3CComparisonRpt = () => {
}
try {
const rows = await dataGetter.fetchAndCacheItems(dataGetter.apiCalls.n3c_comparison_rpt);
let concept_ids = uniq(flatten(rows.map(row => [...(row.added), ...(row.removed)])));
let concept_ids = uniq(flatten(rows.map(row => [...(row.added), ...(row.removed)])).map(d => d.concept_id));
const concepts = await dataGetter.fetchAndCacheItems(dataGetter.apiCalls.concepts, concept_ids);
setData({rows, concepts});
} catch (error) {
Expand All @@ -106,32 +106,44 @@ export const N3CComparisonRpt = () => {
return <div>Loading...</div>;
}
let {rows, concepts} = data
function tbl(concept_ids) {
let tblConcepts = concept_ids.map(d => concepts[d]);
function tbl(tblConcepts) {
return (
<table id="n3ccompdiff"><tbody>{
sortBy(tblConcepts, ['standard_concept', 'concept_name']).map((c,i) => {
const pr = isEmpty(c.replacements) ? null : (
<td>Poss replacements: {
c.replacements.map(r => (
`${r.rels.join(
',')}: ${r.concept_id} ${r.concept_name} ${r.standard_concept} ${r.vocabulary_id} ${r.concept_class_id}`
)).join(', ')
}</td>);
return (
<tr key={i}>
<td>{c.concept_id}</td>
<td><i>{c.standard_concept === 'S' ? 'Standard' : c.standard_concept === 'C' ? 'Classification' : 'Non-standard'}</i></td>
<td>{c.concept_name}</td>
</tr>)
<tr key={i}>
<td>{c.concept_id}</td>
<td>{c.name}</td>
<td><i>{c.std === 'S' ? 'Standard' : c.std === 'C'
? 'Classification'
: 'Non-standard'}</i></td>
<td>{c.voc}</td>
<td>{c.cls}</td>
{pr}
</tr>);
})
}</tbody></table>
}</tbody>
</table>
)
}

function DiffList({data: row}) {
console.log({row});
const removed = isEmpty(row.removed) ? null : <span><b>Removed:</b>{tbl(
row.removed)}</span>;
const added = isEmpty(row.added) ? null : <span><b>Added:</b>{tbl(
row.added)}</span>;
return (
<div style={{margin: 10,}}>
<p>
<b>Removed:</b>{tbl(row.removed)}
</p>
<p>
<b>Added:</b>{tbl(row.added)}
</p>
{removed}
{added}
</div>
);
}
Expand All @@ -144,13 +156,12 @@ export const N3CComparisonRpt = () => {
{grow: 3, sortable: true, name: "New", selector: row => row.cset_2, wrap: true},
{grow: 2, name: "Compare", selector: row => (
<Button
to={`/cset-comparison?codeset_ids=${row.cset_1_codeset_id}&codeset_ids=${row.cset_2_codeset_id}` +
`&comparison_rpt=${row.cset_1_codeset_id}-${row.cset_2_codeset_id}`}
to={`/cset-comparison?codeset_ids=${row.codeset_id_1}&codeset_ids=${row.codeset_id_2}` +
`&comparison_rpt=${row.codeset_id_1}-${row.codeset_id_2}`}
component={Link}
style={{margin: '7px', textTransform: 'none'}}
>
{row.diffs.filter(d => d.startsWith('removed')).length} removed
, {row.diffs.filter(d => d.startsWith('added')).length} added
{row.removed.length} removed, {row.added.length} added
{/*orig {String.fromCodePoint(0x2192)} new*/}
</Button>

Expand Down

0 comments on commit 1c62e95

Please sign in to comment.