From 0d6a5774ecd6db050333026fd0e87f32fe59c453 Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Thu, 22 Mar 2018 23:21:10 -0500 Subject: [PATCH] feat(bagit): clean up the code --- peregrine/resources/submission/__init__.py | 89 +++++----------------- peregrine/utils/pybdbag.py | 9 ++- tests/graphql/test_graphql.py | 60 ++++----------- 3 files changed, 40 insertions(+), 118 deletions(-) diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py index b25b3603..4fa08446 100644 --- a/peregrine/resources/submission/__init__.py +++ b/peregrine/resources/submission/__init__.py @@ -13,6 +13,8 @@ import peregrine.blueprints from . import graphql +from peregrine.utils import jsonify_check_errors + def get_open_project_ids(): @@ -67,51 +69,6 @@ def set_read_access_projects(): open_project_ids = get_open_project_ids() flask.g.read_access_projects.extend(open_project_ids) -# @peregrine.blueprints.blueprint.route('/graphql', methods=['POST']) -# @peregrine.auth.set_global_user -# def root_graphql_query(): -# """ -# Run a graphql query. -# """ -# # Short circuit if user is not recognized. Make sure that the list of -# # projects that the user has read access to is set. - -# print("root_graphql_query. Run a graphql query in resource/submission/__init__") -# try: -# set_read_access_projects() -# except peregrine.errors.AuthError: -# data = flask.jsonify({'data': {}, 'errors': ['Unauthorized query.']}) -# return data, 403 -# payload = peregrine.utils.parse_request_json() -# query = payload.get('query') -# export_format = payload.get('format') -# bag_path = payload.get('path') -# variables, errors = peregrine.utils.get_variables(payload) -# if errors: -# return flask.jsonify({'data': None, 'errors': errors}), 400 -# return_data = graphql.execute_query(query, variables) - -# import pdb; pdb.set_trace() - -# if export_format == 'bdbag': -# # if peregrine.utils.contain_node_with_category(return_data,'data_file') == False: -# # return flask.jsonify({ 'errors': 'No data_file node'}), 400 - -# import pdb; pdb.set_trace() - -# res = peregrine.utils.json2tbl(json.loads(json.dumps(return_data)),'', "_" ) -# bag_info = {'organization': 'CDIS', -# 'data_type': 'TOPMed', -# 'date_created': datetime.date.today().isoformat()} -# args = dict( -# bag_path=bag_path, -# bag_info=bag_info, -# payload=res) -# peregrine.utils.create_bdbag(**args) # bag is a compressed file -# return flask.jsonify({'data': res}), 200 -# else: -# return peregrine.utils.jsonify_check_errors(return_data) -# #return flask.jsonify({'data': 'Format not supported !!!'}), 400 @peregrine.blueprints.blueprint.route('/graphql', methods=['POST']) @peregrine.auth.set_global_user @@ -135,38 +92,30 @@ def root_graphql_query(): variables, errors = peregrine.utils.get_variables(payload) if errors: return flask.jsonify({'data': None, 'errors': errors}), 400 - - return_data= graphql.execute_query(query, variables) - import pdb; pdb.set_trace() - return_data = peregrine.utils.jsonify_check_errors(return_data) - - data, error = return_data + return_data = jsonify_check_errors(graphql.execute_query(query, variables)) if export_format == 'bdbag': - # if peregrine.utils.contain_node_with_category(return_data,'data_file') == False: - # return flask.jsonify({ 'errors': 'No data_file node'}), 400 - - - import pdb; pdb.set_trace() - - res = peregrine.utils.json2tbl(data.json),'', "_" ) - - import pdb; pdb.set_trace() - bag_info = {'organization': 'CDIS', - 'data_type': 'TOPMed', - 'date_created': datetime.date.today().isoformat()} - args = dict( - bag_path=bag_path, - bag_info=bag_info, - payload=res) - peregrine.utils.create_bdbag(**args) # bag is a compressed file - return flask.jsonify({'data': res}), 200 + data, code = return_data + if code == 200: + # if peregrine.utils.contain_node_with_category(data.json,'data_file') == False: + # return flask.jsonify({ 'errors': 'No data_file node'}), 400 + res = peregrine.utils.json2tbl(data.json,'', "_" ) + bag_info = {'organization': 'CDIS', + 'data_type': 'TOPMed', + 'date_created': datetime.date.today().isoformat()} + args = dict( + bag_path=bag_path, + bag_info=bag_info, + payload=res) + peregrine.utils.create_bdbag(**args) # bag is a compressed file + return flask.jsonify({'data': res}), code + else: + return data, code else: return return_data #return flask.jsonify({'data': 'Format not supported !!!'}), 400 - def get_introspection_query(): """ Load the graphql introspection query from its file. diff --git a/peregrine/utils/pybdbag.py b/peregrine/utils/pybdbag.py index 80715c74..7f709cee 100644 --- a/peregrine/utils/pybdbag.py +++ b/peregrine/utils/pybdbag.py @@ -1,4 +1,5 @@ import os +import copy import bagit import csv import zipfile @@ -22,14 +23,14 @@ def create_bdbag(bag_path, bag_info, payload, max_row=10000): for k in payload[0].keys(): k = k.replace('_data_','') row.append(k) - + header = copy.deepcopy(row) writer.writerow(row) nrow = 1 for row_dict in payload: row=[] - for (k,v) in row_dict.iteritems(): - if v: - row.append(v) + for h in header: + if row_dict['_data_'+h]: + row.append(row_dict['_data_'+h]) else: row.append('None') writer.writerow(row) diff --git a/tests/graphql/test_graphql.py b/tests/graphql/test_graphql.py index 4d2e8017..fd82b488 100644 --- a/tests/graphql/test_graphql.py +++ b/tests/graphql/test_graphql.py @@ -1304,47 +1304,7 @@ def put_cgci_blgsp(client, auth=None): # r = client.post(export_path, headers=submitter, data=data) # assert r.status_code == 400 -# def test_export_bagit(monkeypatch,client, submitter, pg_driver_clean, cgci_blgsp): -# post_example_entities_together(client, pg_driver_clean, submitter) - -# r = client.post(path, headers=submitter, data=json.dumps({ -# 'query': """{ -# a: project (project_id: "CGCI-BLGSP") { project_id } -# b: project (project_id: "FAKE") { project_id } -# c: project (project_id: "FAKE_PROJECT") { project_id } -# d: project (project_id: ["CGCI-BLGSP", "FAKE", "FAKE-PROJECT"]) { -# project_id -# } -# }""" -# })) - -# monkeypatch.setattr( -# peregrine.utils,'contain_node_with_category', -# lambda x,y: True -# ) - -# ret_data = json.loads(r.data) - -# data = json.dumps({'bag_path':'manifest_bag', -# 'export_data': ret_data}) - -# r = client.post(export_path, headers=submitter, data=data) -# assert r.status_code == 200 -# assert os.path.exists('manifest_bag.zip') -# assert os.path.exists('manifest_bag/bag-info.txt') -# assert os.path.exists('manifest_bag/bagit.txt') -# assert os.path.exists('manifest_bag/data/manifest.tsv') -# assert os.path.exists('manifest_bag/manifest-sha512.txt') -# assert os.path.exists('manifest_bag/tagmanifest-sha512.txt') -# assert os.path.exists('manifest_bag/manifest-sha256.txt') -# assert os.path.exists('manifest_bag/tagmanifest-sha256.txt') - -# #tear down -# os.remove('manifest_bag.zip') -# shutil.rmtree('manifest_bag') - - -def test_special_case_project_id( +def test_export_bagit( client, submitter, pg_driver_clean, cgci_blgsp, put_tcga_brca): data = json.dumps({ 'format': 'bdbag', @@ -1358,9 +1318,21 @@ def test_special_case_project_id( fragment f on project { project_id code } """ }) - r = client.post(path, headers=submitter, data=data) - print r.data - import pdb; pdb.set_trace() + res = client.post(path, headers=submitter, data=data) + print res.data + assert res.status_code == 200 + assert os.path.exists('manifest_bag.zip') + assert os.path.exists('manifest_bag/bag-info.txt') + assert os.path.exists('manifest_bag/bagit.txt') + assert os.path.exists('manifest_bag/data/manifest.tsv') + assert os.path.exists('manifest_bag/manifest-sha512.txt') + assert os.path.exists('manifest_bag/tagmanifest-sha512.txt') + assert os.path.exists('manifest_bag/manifest-sha256.txt') + assert os.path.exists('manifest_bag/tagmanifest-sha256.txt') + + #tear down + os.remove('manifest_bag.zip') + shutil.rmtree('manifest_bag')