diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py index b8a35008..ab61b7be 100644 --- a/peregrine/resources/submission/__init__.py +++ b/peregrine/resources/submission/__init__.py @@ -17,7 +17,6 @@ from peregrine.utils import jsonify_check_errors - def get_open_project_ids(): """ List project ids corresponding to projects with ``availability_type == @@ -70,7 +69,7 @@ def set_read_access_projects(): open_project_ids = get_open_project_ids() flask.g.read_access_projects.extend(open_project_ids) - + @peregrine.blueprints.blueprint.route('/graphql', methods=['POST']) @peregrine.auth.set_global_user def root_graphql_query(): @@ -99,16 +98,17 @@ def root_graphql_query(): if export_format == 'bdbag': data, code = return_data if code == 200: - if peregrine.utils.contain_node_with_category(data.json,'data_file') == False: - return flask.jsonify({ 'errors': 'No data_file node'}), 400 - res = peregrine.utils.json2tbl(json.loads(data.data),'', "_" ) + if peregrine.utils.contain_node_with_category(data.json, 'data_file') == False: + return flask.jsonify({'errors': 'No data_file node'}), 400 + res = peregrine.utils.json2tbl(json.loads(data.data), '', "_") tsv = peregrine.utils.dicts2tsv(res) return flask.Response(tsv, mimetype='text/tab-separated-values'), code else: return data, code else: return return_data - #return flask.jsonify({'data': 'Format not supported !!!'}), 400 + # return flask.jsonify({'data': 'Format not supported !!!'}), 400 + def get_introspection_query(): """ @@ -137,37 +137,38 @@ def root_graphql_schema_query(): ) ) -@peregrine.blueprints.blueprint.route('/export', methods=['POST']) -def get_manifest(): - """ - Creates and returns a manifest based on the filters pased on - to this endpoint - parameters: - - name: filters - in: graphql result in json format - description: Filters to be applied when generating the manifest - :return: A manifest that the user can use to download the files in there - """ - payload = peregrine.utils.parse_request_json() - export_data = payload.get('export_data') - bag_path = payload.get('bag_path') - - if(bag_path is None): - return flask.jsonify({'bag_path': None, 'errors': 'bag_path is required!!!'}), 400 - - if peregrine.utils.contain_node_with_category(export_data,'data_file') == False: - return flask.jsonify({ 'errors': 'No data_file node'}), 400 - - res = peregrine.utils.json2tbl(export_data,'', "_" ) - tsv = peregrine.utils.dicts2tsv(res) - - bag_info = {'organization': 'CDIS', - 'data_type': 'TOPMed', - 'date_created': datetime.date.today().isoformat()} - args = dict( - bag_path=bag_path, - bag_info=bag_info, - payload=res) - peregrine.utils.create_bdbag(**args) # bag is a compressed file - return flask.jsonify({'data': res}), 200 +# @peregrine.blueprints.blueprint.route('/export', methods=['POST']) +# def get_manifest(): +# """ +# Creates and returns a manifest based on the filters pased on +# to this endpoint +# parameters: +# - name: filters +# in: graphql result in json format +# description: Filters to be applied when generating the manifest +# :return: A manifest that the user can use to download the files in there +# """ +# payload = peregrine.utils.parse_request_json() +# export_data = payload.get('export_data') +# bag_path = payload.get('bag_path') + +# if(bag_path is None): +# return flask.jsonify({'bag_path': None, 'errors': 'bag_path is required!!!'}), 400 + +# if peregrine.utils.contain_node_with_category(export_data, 'data_file') == False: +# return flask.jsonify({'errors': 'No data_file node'}), 400 + +# res = peregrine.utils.json2tbl(export_data, '', "_") +# tsv = peregrine.utils.dicts2tsv(res) + +# bag_info = {'organization': 'CDIS', +# 'data_type': 'TOPMed', +# 'date_created': datetime.date.today().isoformat()} +# args = dict( +# bag_path=bag_path, +# bag_info=bag_info, +# payload=res) +# peregrine.utils.create_bdbag(**args) # bag is a compressed file + +# return flask.jsonify({'data': res}), 200 diff --git a/peregrine/utils/json2csv.py b/peregrine/utils/json2csv.py index 7e02d83a..5cf35206 100644 --- a/peregrine/utils/json2csv.py +++ b/peregrine/utils/json2csv.py @@ -59,26 +59,29 @@ def dicts2tsv(dict_list): output string """ tsv = "" - row = [] - for k in sorted(dict_list[0]): - k = k.replace('_data_','') - tsv = tsv + "{}\t".format(k) - tsv = tsv[:-1] + "\n" + + header_set = set() + + for dict_row in dict_list: + header_set.update(dict_row.keys()) + + for h in sorted(header_set): + h = h.replace('_data_', '') + tsv = tsv + "{}\t".format(h) nrow = 0 for dict_row in dict_list: - row=[] - for k in sorted(dict_row): - if dict_row[k]: - tsv = tsv + "{}\t".format(dict_row[k]) + row = [] + for h in sorted(header_set): + if dict_row[h]: + tsv = tsv + "{}\t".format(dict_row[h]) else: - tsv = tsv + "None\t" - tsv = tsv[:-1] + "\n" - nrow = nrow + 1 - if nrow >= 1000: - break - - return tsv + tsv = tsv + "None\t" + tsv = tsv[:-1] + "\n" + nrow = nrow + 1 + if nrow >= 1000: + break + return tsv def join(table_list, L, index, row): ''' @@ -100,7 +103,8 @@ def join(table_list, L, index, row): newrow.update(item) join(table_list, L, index + 1, newrow) -def json2tbl(json,prefix,delem): + +def json2tbl(json, prefix, delem): ''' Args: json: graphQL output JSON @@ -113,16 +117,16 @@ def json2tbl(json,prefix,delem): L = [] if isinstance(json, list) and json != []: for l in json: - L += (json2tbl(l,prefix,delem)) + L += (json2tbl(l, prefix, delem)) return L if isinstance(json, dict): - #handle dictionary + # handle dictionary table_list = [] for k in json.keys(): table = json2tbl(json[k], prefix + delem + k, delem) table_list.append(table) - join(table_list,L,0,{}) + join(table_list, L, 0, {}) else: L.append({prefix: json}) return L diff --git a/peregrine/utils/payload.py b/peregrine/utils/payload.py index b6da5b1e..44858a90 100644 --- a/peregrine/utils/payload.py +++ b/peregrine/utils/payload.py @@ -30,6 +30,7 @@ from peregrine.resources.submission.graphql.node import get_fields + def get_external_proxies(): """Get any custom proxies set in the config. @@ -216,6 +217,7 @@ def get_introspection_query(): f = open(os.path.join(cur_dir, 'graphql', 'introspection_query.txt'), 'r') return f.read() + def json_dumps_formatted(data): """Return json string with standard format.""" dump = json.dumps( @@ -223,6 +225,7 @@ def json_dumps_formatted(data): ) return dump.encode('utf-8') + def jsonify_check_errors(data_and_errors, error_code=400): """ TODO @@ -247,6 +250,7 @@ def get_variables(payload): errors = ['Unable to parse variables', str(e)] return variables, errors + def contain_node_with_category(json, category): ''' Check if JSON object contain `category` keys or not @@ -256,12 +260,12 @@ def contain_node_with_category(json, category): True: if JSON object contains data_file key False: otherwise ''' - keys_list=[] + keys_list = [] get_keys(json, keys_list) ns_field = get_fields() category_map = {} - for (k,v) in ns_field.iteritems(): + for (k, v) in ns_field.iteritems(): category_map[v] = k._dictionary['category'] for key in keys_list: @@ -272,6 +276,7 @@ def contain_node_with_category(json, category): pass return False + def get_keys(payload, keys_list): ''' Get all keys of JSON object and update to the keys_list @@ -280,4 +285,4 @@ def get_keys(payload, keys_list): keys_list += payload.keys() map(lambda x: get_keys(x, keys_list), payload.values()) elif isinstance(payload, list): - map(lambda x: get_keys(x, keys_list), payload) \ No newline at end of file + map(lambda x: get_keys(x, keys_list), payload) diff --git a/tests/graphql/test_graphql.py b/tests/graphql/test_graphql.py index 9bf4bd54..657c2f45 100644 --- a/tests/graphql/test_graphql.py +++ b/tests/graphql/test_graphql.py @@ -19,11 +19,12 @@ DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') path = '/v0/submission/graphql' -export_path = '/v0/submission/export' +#export_path = '/v0/submission/export' # ====================================================================== # Fixtures + @pytest.fixture def graphql_client(client, submitter): def execute(query, variables={}): @@ -85,6 +86,7 @@ def failed_upload_transaction(client, submitter, pg_driver_clean): # ====================================================================== # Tests + def post_example_entities_together( client, pg_driver_clean, submitter, data_fnames=data_fnames): path = BLGSP_PATH @@ -103,6 +105,7 @@ def put_example_entities_together(client, pg_driver_clean, submitter): data.append(json.loads(f.read())) return client.put(path, headers=submitter, data=json.dumps(data)) + def put_cgci(client, auth=None): path = '/v0/submission' data = json.dumps({ @@ -112,6 +115,7 @@ def put_cgci(client, auth=None): r = client.put(path, headers=auth, data=data) return r + def put_cgci_blgsp(client, auth=None): put_cgci(client, auth=auth) path = '/v0/submission/CGCI/' @@ -171,6 +175,7 @@ def test_unauthorized_graphql_query(client, submitter, pg_driver_clean, cgci_blg })) assert r.status_code == 403, r.data + def test_fragment(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) r = client.post(path, headers=submitter, data=json.dumps({ @@ -621,6 +626,7 @@ def test_auth_counts(client, submitter, pg_driver_clean, cgci_blgsp): with pg_driver_clean.session_scope(): assert r.json['data']['_case_count'] == 0 + def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) r = client.post(path, headers=submitter, data=json.dumps({ @@ -629,11 +635,11 @@ def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): assert r.json == { "data": { "transaction_log": [{ - 'project_id': 'CGCI-BLGSP', 'submitter': None + 'project_id': 'CGCI-BLGSP', 'submitter': None }] } } - + def test_auth_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) @@ -652,7 +658,7 @@ def test_with_path_to(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): case_sub_id = pg_driver_clean.nodes(models.Case).path('samples')\ - .first().submitter_id + .first().submitter_id r = client.post(path, headers=submitter, data=json.dumps({ 'query': """ query Test {{ @@ -905,6 +911,7 @@ def test_catch_language_error(client, submitter, pg_driver_clean, cgci_blgsp): )] } + @pytest.mark.skip(reason='must rewrite query') def test_filter_empty_prop_list( client, submitter, pg_driver_clean, cgci_blgsp, monkeypatch): @@ -1027,8 +1034,6 @@ def test_read_group_with_path_to_case( } - - def test_tx_logs_async_fields(pg_driver_clean, graphql_client, cgci_blgsp): assert graphql_client("""{ tx_log: transaction_log { @@ -1100,6 +1105,7 @@ def test_tx_logs_committable(pg_driver_clean, graphql_client, cgci_blgsp, mock_t } } + @pytest.mark.skip(reason='we have different data') def test_tx_logs_deletion(pg_driver_clean, graphql_client, cgci_blgsp, failed_deletion_transaction): response = graphql_client("""{ @@ -1221,24 +1227,25 @@ def test_tx_log_comprehensive_query_failed_deletion( assert response.status_code == 200, response.data assert 'errors' not in response.json, response.data + def test_json2tbl(): - + data = {"project": [ - { - "code": "BLGSP", + { + "code": "BLGSP", "experiments": [], "id": "daa208a7-f57a-562c-a04a-7a7c77542c98", "name": "Burkitt Lymphoma Genome Sequencing Project", "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" - } + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } ] - }] + }] } - res = json2tbl(data,'','_') + res = json2tbl(data, '', '_') assert len(res) == 1 assert res[0]['_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8' @@ -1248,58 +1255,56 @@ def test_json2tbl(): assert res[0]['_project_name'] == 'Burkitt Lymphoma Genome Sequencing Project' - - def test_export(client, submitter, pg_driver_clean): - data = {"data": { - "project": [ - { - "code": "BLGSP", - "experiments": [], - "id": "daa208a7-f57a-562c-a04a-7a7c77542c98", - "name": "Burkitt Lymphoma Genome Sequencing Project", - "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" - } - ] - }, - { - "code": "test", - "experiments": [ + data = {"data": { + "project": [ { - "id": "8307c663-af58-4b01-8fd0-9b63f55dac10" + "code": "BLGSP", + "experiments": [], + "id": "daa208a7-f57a-562c-a04a-7a7c77542c98", + "name": "Burkitt Lymphoma Genome Sequencing Project", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] }, - { - "id": "f6e00607-7f38-49ea-b64b-c45ccf0ff990" - } - ], - "id": "a77f549b-c74b-563e-80bb-570b5a4dde88", - "name": "test", - "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" - } - ] - }, - { - "code": "open", - "experiments": [], - "id": "9a2fe4bf-5484-5fe4-b882-0d61ecade7cc", - "name": "Open access Project", - "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" + { + "code": "test", + "experiments": [ + { + "id": "8307c663-af58-4b01-8fd0-9b63f55dac10" + }, + { + "id": "f6e00607-7f38-49ea-b64b-c45ccf0ff990" + } + ], + "id": "a77f549b-c74b-563e-80bb-570b5a4dde88", + "name": "test", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] + }, + { + "code": "open", + "experiments": [], + "id": "9a2fe4bf-5484-5fe4-b882-0d61ecade7cc", + "name": "Open access Project", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] } - ] - } - ] - } - } - res = json2tbl(data,'','_') + ] + } + } + res = json2tbl(data, '', '_') assert len(res) == 4 assert res[0]['_data_project_programs_name'] == 'DEV' @@ -1308,159 +1313,59 @@ def test_export(client, submitter, pg_driver_clean): assert res[1]['_data_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8' assert res[1]['_data_project_name'] == 'test' -def test_export(client, submitter, monkeypatch): - data = json.dumps({'bag_path':'manifest_bag', - "export_data": - {"data": { - "project": [ - { - "code": "BLGSP", - "experiments": [], - "id": "daa208a7-f57a-562c-a04a-7a7c77542c98", - "name": "Burkitt Lymphoma Genome Sequencing Project", - "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" - } - ] - }, - { - "code": "test", - "experiments": [ - { - "id": "8307c663-af58-4b01-8fd0-9b63f55dac10" - }, - { - "id": "f6e00607-7f38-49ea-b64b-c45ccf0ff990" - } - ], - "id": "a77f549b-c74b-563e-80bb-570b5a4dde88", - "name": "test", - "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" - } - ] - }, - { - "code": "open", - "experiments": [], - "id": "9a2fe4bf-5484-5fe4-b882-0d61ecade7cc", - "name": "Open access Project", - "programs": [ - { - "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", - "name": "DEV" - } - ] - } - ] - } - } - }) - monkeypatch.setattr( - peregrine.utils,'contain_node_with_category', - lambda x,y: True - ) - - r = client.post(export_path, headers=submitter, data=data) - assert r.status_code == 200 - - #tear down - # os.remove('manifest_bag.zip') - # shutil.rmtree('manifest_bag') - - -def test_export_with_no_data_file_node(client, submitter,monkeypatch): - - data = json.dumps({'bag_path':'manifest_bag', - "export_data": - {"data": { - "a": [{"project_id": "CGCI-BLGSP"}], - "b": [], - "c": [], - "d": [{"project_id": "CGCI-BLGSP"}] - } - }}) - - monkeypatch.setattr( - peregrine.utils,'contain_node_with_category', - lambda x,y: False - ) - - r = client.post(export_path, headers=submitter, data=data) - assert r.status_code == 400 - -def test_export_bagit( - client, submitter, pg_driver_clean, cgci_blgsp, put_tcga_brca): - data = json.dumps({ - 'format': 'bdbag', - 'path': 'manifest_bag', - 'query': """ - { - valid: project (project_id: "CGCI-BLGSP") { ...f } - invalid: project (project_id: "TCGA-TEST") { ...f } - multiple: project (project_id: ["TCGA-BRCA", "CGCI-BLGSP"]) { ...f } - } - fragment f on project { project_id code } - """ - }) - res = client.post(path, headers=submitter, data=data) - print res.data - assert res.status_code == 200 - assert os.path.exists('manifest_bag.zip') - assert os.path.exists('manifest_bag/bag-info.txt') - assert os.path.exists('manifest_bag/bagit.txt') - assert os.path.exists('manifest_bag/data/manifest.tsv') - assert os.path.exists('manifest_bag/manifest-sha512.txt') - assert os.path.exists('manifest_bag/tagmanifest-sha512.txt') - assert os.path.exists('manifest_bag/manifest-sha256.txt') - assert os.path.exists('manifest_bag/tagmanifest-sha256.txt') - - #tear down - os.remove('manifest_bag.zip') - shutil.rmtree('manifest_bag') - - - -def test_export_bagit(monkeypatch,client, submitter, pg_driver_clean, cgci_blgsp): - post_example_entities_together(client, pg_driver_clean, submitter) - - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ - a: project (project_id: "CGCI-BLGSP") { project_id } - b: project (project_id: "FAKE") { project_id } - c: project (project_id: "FAKE_PROJECT") { project_id } - d: project (project_id: ["CGCI-BLGSP", "FAKE", "FAKE-PROJECT"]) { - project_id - } - }""" - })) - - monkeypatch.setattr( - peregrine.utils,'contain_node_with_category', - lambda x,y: True - ) - - ret_data = json.loads(r.data) - - data = json.dumps({'bag_path':'manifest_bag', - 'export_data': ret_data}) - - r = client.post(export_path, headers=submitter, data=data) - assert r.status_code == 200 - assert os.path.exists('manifest_bag.zip') - assert os.path.exists('manifest_bag/bag-info.txt') - assert os.path.exists('manifest_bag/bagit.txt') - assert os.path.exists('manifest_bag/data/manifest.tsv') - assert os.path.exists('manifest_bag/manifest-sha512.txt') - assert os.path.exists('manifest_bag/tagmanifest-sha512.txt') - assert os.path.exists('manifest_bag/manifest-sha256.txt') - assert os.path.exists('manifest_bag/tagmanifest-sha256.txt') - - #tear down - os.remove('manifest_bag.zip') - shutil.rmtree('manifest_bag') +# def test_export_with_no_data_file_node(client, submitter, monkeypatch): + +# data = json.dumps({'bag_path': 'manifest_bag', +# "export_data": +# {"data": { +# "a": [{"project_id": "CGCI-BLGSP"}], +# "b": [], +# "c": [], +# "d": [{"project_id": "CGCI-BLGSP"}] +# } +# }}) + +# monkeypatch.setattr( +# peregrine.utils, 'contain_node_with_category', +# lambda x, y: False +# ) + +# r = client.post(export_path, headers=submitter, data=data) +# assert r.status_code == 400 + + +# def test_export_bagit( +# client, submitter, monkeypatch): +# data = json.dumps({ +# 'format': 'bdbag', +# 'bag_path': 'manifest_bag', +# 'query': """ +# { +# valid: project (project_id: "CGCI-BLGSP") { ...f } +# invalid: project (project_id: "TCGA-TEST") { ...f } +# multiple: project (project_id: ["TCGA-BRCA", "CGCI-BLGSP"]) { ...f } +# } +# fragment f on project { project_id code } +# """ +# }) +# monkeypatch.setattr( +# peregrine.utils, 'contain_node_with_category', +# lambda x, y: True +# ) + +# res = client.post(export_path, headers=submitter, data=data) +# print res.data +# assert res.status_code == 200 +# assert os.path.exists('manifest_bag.zip') +# assert os.path.exists('manifest_bag/bag-info.txt') +# assert os.path.exists('manifest_bag/bagit.txt') +# assert os.path.exists('manifest_bag/data/manifest.tsv') +# assert os.path.exists('manifest_bag/manifest-sha512.txt') +# assert os.path.exists('manifest_bag/tagmanifest-sha512.txt') +# assert os.path.exists('manifest_bag/manifest-sha256.txt') +# assert os.path.exists('manifest_bag/tagmanifest-sha256.txt') + +# # tear down +# os.remove('manifest_bag.zip') +# shutil.rmtree('manifest_bag')