-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vocab refresh: Update networkx
graphs
#883
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,20 @@ | ||
"""Graph related functions and routes""" | ||
import os, warnings | ||
# import csv | ||
# import io | ||
# import json | ||
import dateutil.parser as dp | ||
from datetime import datetime | ||
from pathlib import Path | ||
from typing import Any, Iterable, List, Set, Tuple, Union, Dict, Optional | ||
|
||
import pickle | ||
import networkx as nx | ||
# import pydot | ||
from fastapi import APIRouter, Query, Request | ||
from networkx import DiGraph | ||
from sqlalchemy import Row, RowMapping | ||
from sqlalchemy.sql import text | ||
|
||
# from fastapi.responses import JSONResponse | ||
# from fastapi.responses import Response | ||
# from fastapi.encoders import jsonable_encoder | ||
# from collections import OrderedDict | ||
# from igraph import Graph | ||
# from networkx.drawing.nx_pydot import to_pydot, from_pydot | ||
|
||
from backend.routes.db import get_cset_members_items | ||
from backend.db.queries import get_concepts | ||
from backend.db.utils import get_db_connection, SCHEMA | ||
from backend.db.utils import check_db_status_var, get_db_connection, SCHEMA | ||
from backend.api_logger import Api_logger | ||
from backend.utils import get_timer, commify | ||
|
||
|
@@ -45,17 +36,15 @@ async def concept_graph_get( | |
) -> Dict[str, Any]: | ||
"""Return concept graph""" | ||
cids = cids if cids else [] | ||
return await concept_graph_post(request, codeset_ids, cids, hide_vocabs, | ||
hide_nonstandard_concepts, verbose) | ||
return await concept_graph_post(request, codeset_ids, cids, hide_vocabs, hide_nonstandard_concepts, verbose) | ||
|
||
|
||
# todo: match return of concept_graph() | ||
@router.post("/concept-graph") | ||
async def concept_graph_post( | ||
request: Request, codeset_ids: List[int], cids: Union[List[int], None] = [], | ||
hide_vocabs = ['RxNorm Extension'], hide_nonstandard_concepts=False, verbose = VERBOSE, | ||
) -> List[List[Union[int, Any]]]: | ||
|
||
) -> Dict: | ||
"""Return concept graph via HTTP POST""" | ||
rpt = Api_logger() | ||
try: | ||
await rpt.start_rpt(request, params={'codeset_ids': codeset_ids, 'cids': cids}) | ||
|
@@ -140,6 +129,7 @@ async def concept_graph( | |
# TODO: @Siggie: move below to frontend | ||
# noinspection PyPep8Naming | ||
def MOVE_TO_FRONT_END(): | ||
"""Move to front end""" | ||
hidden_by_voc = {} | ||
hide_if_over = 50 | ||
tree = [] # this used to be indented tree stuff that we're no longer using | ||
|
@@ -190,6 +180,10 @@ def filter_concepts( | |
|
||
# noinspection PyPep8Naming | ||
def get_missing_in_between_nodes(G: nx.DiGraph, subgraph_nodes: Union[List[int], Set[int]], verbose=VERBOSE) -> Set: | ||
"""Find any missing nodes that exist in a subgraph. | ||
|
||
This can happen when a concept set expansions that are indirect subtrees of other expansions. | ||
For""" | ||
missing_in_between_nodes = set() | ||
missing_in_between_nodes_tmp = set() | ||
subgraph_nodes = set(subgraph_nodes) | ||
|
@@ -249,23 +243,6 @@ def get_missing_in_between_nodes(G: nx.DiGraph, subgraph_nodes: Union[List[int], | |
return missing_in_between_nodes | ||
|
||
|
||
def test_get_missing_in_between_nodes( | ||
whole_graph_edges=None, non_subgraph_nodes=None, expected_missing_in_between_nodes=None, subgraph_nodes=None, | ||
fail=True, verbose=False | ||
): | ||
# add code to load whole REL_GRAPH | ||
G = DiGraph(whole_graph_edges) | ||
subgraph_nodes = subgraph_nodes or set(G.nodes) - set(non_subgraph_nodes) | ||
missing_in_between_nodes = get_missing_in_between_nodes(G, subgraph_nodes, verbose=verbose) | ||
if fail: | ||
assert missing_in_between_nodes == set(expected_missing_in_between_nodes) | ||
else: | ||
if missing_in_between_nodes == set(expected_missing_in_between_nodes): | ||
print(f"passed with {missing_in_between_nodes}") | ||
else: | ||
print(f"expected {expected_missing_in_between_nodes}, got {missing_in_between_nodes}") | ||
|
||
|
||
@router.get("/wholegraph") | ||
def wholegraph(): | ||
"""Get subgraph edges for the whole graph""" | ||
|
@@ -375,23 +352,26 @@ def create_rel_graphs(save_to_pickle: bool) -> DiGraph: | |
return G # , Gu | ||
|
||
|
||
def load_relationship_graph(save_if_not_exists=True): | ||
def is_graph_up_to_date(graph_path: str = GRAPH_PATH) -> bool: | ||
"""Determine if the networkx relationship_graph derived from OMOP vocab is current""" | ||
voc_last_updated = dp.parse(check_db_status_var('last_refreshed_vocab_tables')) | ||
graph_last_updated = datetime.fromtimestamp(os.path.getmtime(graph_path)) | ||
if voc_last_updated.tzinfo and not graph_last_updated.tzinfo: # if one has timezone, both need | ||
graph_last_updated = graph_last_updated.replace(tzinfo=voc_last_updated.tzinfo) | ||
return graph_last_updated > voc_last_updated | ||
|
||
|
||
# noinspection PyPep8Naming for_G | ||
def load_relationship_graph(graph_path: str = GRAPH_PATH, update_if_outdated=True, save=True) -> DiGraph: | ||
"""Load relationship graph from disk""" | ||
timer = get_timer('./load_relationship_graph') | ||
timer(f'loading {GRAPH_PATH}') | ||
if os.path.isfile(GRAPH_PATH): | ||
with open(GRAPH_PATH, 'rb') as pickle_file: | ||
# noinspection PyPep8Naming | ||
G = pickle.load(pickle_file) | ||
# while True: | ||
# try: | ||
# chunk = pickle.load(pickle_file) | ||
# G.add_edges_from(chunk) | ||
# except EOFError: | ||
# break # End of file reached | ||
timer(f'loading {graph_path}') | ||
up_to_date = True if not update_if_outdated else is_graph_up_to_date(GRAPH_PATH) | ||
if os.path.isfile(graph_path) and up_to_date: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If not true, triggers creation / save of new graph. |
||
with open(graph_path, 'rb') as pickle_file: | ||
G: DiGraph = pickle.load(pickle_file) | ||
else: | ||
# noinspection PyPep8Naming | ||
G = create_rel_graphs(save_if_not_exists) | ||
G: DiGraph = create_rel_graphs(save) | ||
timer('done') | ||
return G | ||
|
||
|
@@ -406,20 +386,7 @@ def load_relationship_graph(save_if_not_exists=True): | |
# import builtins | ||
# builtins.DONT_LOAD_GRAPH = True | ||
import builtins | ||
|
||
if hasattr(builtins, 'DONT_LOAD_GRAPH') and builtins.DONT_LOAD_GRAPH: | ||
warnings.warn('not loading relationship graph') | ||
else: | ||
REL_GRAPH = load_relationship_graph(save_if_not_exists=True) | ||
# REVERSE_GRAPH = REL_GRAPH.reverse() | ||
|
||
# G_ROOTS = set([n for n in REL_GRAPH.nodes if REL_GRAPH.in_degree(n) == 0]) | ||
# def distance_to_root(G, node): | ||
# n = node | ||
# d = 0 | ||
# for p in G.predecessors(node): | ||
# if n in G_ROOTS: | ||
# return d | ||
# d += 1 | ||
# n = p | ||
# raise Exception(f"can't find root for {node}") | ||
REL_GRAPH = load_relationship_graph() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Activates: On server startSince this file gets imported during server start, this line runs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: Activate on cron or hitting of home route |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI:
is_graph_up_to_date()