From 5cd47aab35e38c4819dcc4630a8e85ad44fa414d Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Wed, 28 Mar 2018 15:25:52 -0500 Subject: [PATCH] chore(bagit): change method names change dicts2tbl -> dicts2tsv change table -> tsv --- peregrine/resources/submission/__init__.py | 36 ------------- peregrine/utils/__init__.py | 2 +- peregrine/utils/json2csv.py | 60 ++++++++++++---------- peregrine/utils/pybdbag.py | 4 +- tests/graphql/test_graphql.py | 8 +-- 5 files changed, 41 insertions(+), 69 deletions(-) diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py index af9461e5..0f79b063 100644 --- a/peregrine/resources/submission/__init__.py +++ b/peregrine/resources/submission/__init__.py @@ -148,39 +148,3 @@ def root_graphql_schema_query(): graphql.execute_query(get_introspection_query()) ) ) - -# @peregrine.blueprints.blueprint.route('/export', methods=['POST']) -# def get_manifest(): -# """ -# Creates and returns a manifest based on the filters pased on -# to this endpoint -# parameters: -# - name: filters -# in: graphql result in json format -# description: Filters to be applied when generating the manifest -# :return: A manifest that the user can use to download the files in there -# """ -# payload = peregrine.utils.parse_request_json() -# export_data = payload.get('export_data') -# bag_path = payload.get('bag_path') - -# if(bag_path is None): -# return flask.jsonify({'bag_path': None, 'errors': 'bag_path is required!!!'}), 400 - -# if peregrine.utils.contain_node_with_category(export_data, 'data_file') == False: -# return flask.jsonify({'errors': 'No data_file node'}), 400 - -# res = peregrine.utils.json2tbl(export_data, '', "_") -# tsv = peregrine.utils.dicts2tsv(res) - -# bag_info = {'organization': 'CDIS', -# 'data_type': 'TOPMed', -# 'date_created': datetime.date.today().isoformat()} -# args = dict( -# bag_path=bag_path, -# bag_info=bag_info, -# payload=res) -# # bag is a compressed file -# return peregrine.utils.create_bdbag(**args), 200 - -# # return flask.jsonify({'data': res}), 200 \ No newline at end of file diff --git a/peregrine/utils/__init__.py b/peregrine/utils/__init__.py index 0e7bef9b..158931b7 100644 --- a/peregrine/utils/__init__.py +++ b/peregrine/utils/__init__.py @@ -1,5 +1,5 @@ from .payload import get_variables,jsonify_check_errors,parse_request_json,get_keys,contain_node_with_category from .pybdbag import create_bdbag from .scheduling import AsyncPool -from .json2csv import flatten_obj,json2tbl, dicts2tsv, flatten_json +from .json2csv import flatten_obj,json2tsv, dicts2tsv, flatten_json from .response import format_response diff --git a/peregrine/utils/json2csv.py b/peregrine/utils/json2csv.py index 3d959b01..d91af132 100644 --- a/peregrine/utils/json2csv.py +++ b/peregrine/utils/json2csv.py @@ -53,11 +53,14 @@ def dicts2tsv(dict_list): """ Convert the list of dictionary to tsv format. Each element of the list represent a row in tsv + Args: - dict_list: list of dictionary - Return: - output string + dict_list(list): list of dictionary + + Returns: + output(str): string in tsv format """ + tsv = "" header_set = set() @@ -84,58 +87,63 @@ def dicts2tsv(dict_list): return tsv -def join(table_list, L, index, row): +def join(tsv_list, L, index, row): ''' - Join sub tables to generate a big table + Join list of sub tsv to generate a big tsv Args: - table_list: list of tables. Each table is represented by a list of dictionary - L: joined table that is iteratively updated - index: int - row: dictionary + tsv_list(list): list of tables or tvs. Each element is represented by a list of dictionary + L(list): joined table that is iteratively updated + index(int): the index of the table will be joined + row(dict): the current joining row - Return: None + Returns: None ''' - if index == len(table_list): + if index == len(tsv_list): L.append(row) else: - for item in table_list[index]: + for item in tsv_list[index]: newrow = row.copy() newrow.update(item) - join(table_list, L, index + 1, newrow) + join(tsv_list, L, index + 1, newrow) -def json2tbl(json, prefix, delem): +def json2tsv(json, prefix, delem): ''' + Convert json file to tsv format + Args: - json: graphQL output JSON - prefix: prefix string - delem: delimitter - Output: list of dictionary representing a table. Each item in the list represent a row data. - each row is a dictionary with column name key and value at that position + json(json) graphQL output JSON + prefix(str) prefix string + delem(char): delimitter .e.g '\t' + Returns: + list of dictionary representing a tsv file. Each item in the list represent a row data. + each row is a dictionary with column name key and value at that position ''' + L = [] if isinstance(json, list) and json != []: for l in json: - L += (json2tbl(l, prefix, delem)) + L += (json2tsv(l, prefix, delem)) return L if isinstance(json, dict): # handle dictionary - table_list = [] + tsv_list = [] for k in json.keys(): - table = json2tbl(json[k], prefix + delem + k, delem) - table_list.append(table) + tsv = json2tsv(json[k], prefix + delem + k, delem) + tsv_list.append(tsv) - join(table_list, L, 0, {}) + join(tsv_list, L, 0, {}) else: L.append({prefix: json}) return L + def flatten_json(json, prefix, delem): data = json['data'] res = {} for key, val in data.iteritems(): - res[key] = json2tbl({key:val}, prefix, delem) - + res[key] = json2tsv({key: val}, prefix, delem) + return res diff --git a/peregrine/utils/pybdbag.py b/peregrine/utils/pybdbag.py index 605361ea..6c94ea80 100644 --- a/peregrine/utils/pybdbag.py +++ b/peregrine/utils/pybdbag.py @@ -16,7 +16,7 @@ def create_bdbag(bag_info, payload, max_row=1000): bag_info: bdbag info payload(json): resutl of graphql given a query max_row(int): the row limitation of tsv files - Return: + Returns: the path of bdbag zip file """ @@ -34,7 +34,7 @@ def create_bdbag(bag_info, payload, max_row=1000): for dict_row in json_data: header_set.update(dict_row.keys()) - with open(bag_path + '/data/' + node_name+ '.tsv', 'w') as tsvfile: + with open(bag_path + '/data/' + node_name + '.tsv', 'w') as tsvfile: writer = csv.writer(tsvfile, delimiter='\t') row = [] for h in header_set: diff --git a/tests/graphql/test_graphql.py b/tests/graphql/test_graphql.py index 657c2f45..598c0363 100644 --- a/tests/graphql/test_graphql.py +++ b/tests/graphql/test_graphql.py @@ -11,7 +11,7 @@ from tests.graphql import utils from tests.graphql.utils import data_fnames -from peregrine.utils import json2tbl +from peregrine.utils import json2tsv BLGSP_PATH = '/v0/submission/CGCI/BLGSP/' BRCA_PATH = '/v0/submission/TCGA/BRCA/' @@ -1228,7 +1228,7 @@ def test_tx_log_comprehensive_query_failed_deletion( assert 'errors' not in response.json, response.data -def test_json2tbl(): +def test_json2tsv(): data = {"project": [ { @@ -1245,7 +1245,7 @@ def test_json2tbl(): }] } - res = json2tbl(data, '', '_') + res = json2tsv(data, '', '_') assert len(res) == 1 assert res[0]['_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8' @@ -1304,7 +1304,7 @@ def test_export(client, submitter, pg_driver_clean): ] } } - res = json2tbl(data, '', '_') + res = json2tsv(data, '', '_') assert len(res) == 4 assert res[0]['_data_project_programs_name'] == 'DEV'