Skip to content

Commit

Permalink
chore(bagit): change method names
Browse files Browse the repository at this point in the history
change dicts2tbl -> dicts2tsv
change table -> tsv
  • Loading branch information
giangbui committed Mar 28, 2018
1 parent c368680 commit 5cd47aa
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 69 deletions.
36 changes: 0 additions & 36 deletions peregrine/resources/submission/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,39 +148,3 @@ def root_graphql_schema_query():
graphql.execute_query(get_introspection_query())
)
)

# @peregrine.blueprints.blueprint.route('/export', methods=['POST'])
# def get_manifest():
# """
# Creates and returns a manifest based on the filters pased on
# to this endpoint
# parameters:
# - name: filters
# in: graphql result in json format
# description: Filters to be applied when generating the manifest
# :return: A manifest that the user can use to download the files in there
# """
# payload = peregrine.utils.parse_request_json()
# export_data = payload.get('export_data')
# bag_path = payload.get('bag_path')

# if(bag_path is None):
# return flask.jsonify({'bag_path': None, 'errors': 'bag_path is required!!!'}), 400

# if peregrine.utils.contain_node_with_category(export_data, 'data_file') == False:
# return flask.jsonify({'errors': 'No data_file node'}), 400

# res = peregrine.utils.json2tbl(export_data, '', "_")
# tsv = peregrine.utils.dicts2tsv(res)

# bag_info = {'organization': 'CDIS',
# 'data_type': 'TOPMed',
# 'date_created': datetime.date.today().isoformat()}
# args = dict(
# bag_path=bag_path,
# bag_info=bag_info,
# payload=res)
# # bag is a compressed file
# return peregrine.utils.create_bdbag(**args), 200

# # return flask.jsonify({'data': res}), 200
2 changes: 1 addition & 1 deletion peregrine/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .payload import get_variables,jsonify_check_errors,parse_request_json,get_keys,contain_node_with_category
from .pybdbag import create_bdbag
from .scheduling import AsyncPool
from .json2csv import flatten_obj,json2tbl, dicts2tsv, flatten_json
from .json2csv import flatten_obj,json2tsv, dicts2tsv, flatten_json
from .response import format_response
60 changes: 34 additions & 26 deletions peregrine/utils/json2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,14 @@ def dicts2tsv(dict_list):
"""
Convert the list of dictionary to tsv format.
Each element of the list represent a row in tsv
Args:
dict_list: list of dictionary
Return:
output string
dict_list(list): list of dictionary
Returns:
output(str): string in tsv format
"""

tsv = ""

header_set = set()
Expand All @@ -84,58 +87,63 @@ def dicts2tsv(dict_list):
return tsv


def join(table_list, L, index, row):
def join(tsv_list, L, index, row):
'''
Join sub tables to generate a big table
Join list of sub tsv to generate a big tsv
Args:
table_list: list of tables. Each table is represented by a list of dictionary
L: joined table that is iteratively updated
index: int
row: dictionary
tsv_list(list): list of tables or tvs. Each element is represented by a list of dictionary
L(list): joined table that is iteratively updated
index(int): the index of the table will be joined
row(dict): the current joining row
Return: None
Returns: None
'''
if index == len(table_list):
if index == len(tsv_list):
L.append(row)
else:
for item in table_list[index]:
for item in tsv_list[index]:
newrow = row.copy()
newrow.update(item)
join(table_list, L, index + 1, newrow)
join(tsv_list, L, index + 1, newrow)


def json2tbl(json, prefix, delem):
def json2tsv(json, prefix, delem):
'''
Convert json file to tsv format
Args:
json: graphQL output JSON
prefix: prefix string
delem: delimitter
Output: list of dictionary representing a table. Each item in the list represent a row data.
each row is a dictionary with column name key and value at that position
json(json) graphQL output JSON
prefix(str) prefix string
delem(char): delimitter .e.g '\t'
Returns:
list of dictionary representing a tsv file. Each item in the list represent a row data.
each row is a dictionary with column name key and value at that position
'''

L = []
if isinstance(json, list) and json != []:
for l in json:
L += (json2tbl(l, prefix, delem))
L += (json2tsv(l, prefix, delem))
return L
if isinstance(json, dict):
# handle dictionary
table_list = []
tsv_list = []
for k in json.keys():
table = json2tbl(json[k], prefix + delem + k, delem)
table_list.append(table)
tsv = json2tsv(json[k], prefix + delem + k, delem)
tsv_list.append(tsv)

join(table_list, L, 0, {})
join(tsv_list, L, 0, {})
else:
L.append({prefix: json})
return L


def flatten_json(json, prefix, delem):
data = json['data']
res = {}
for key, val in data.iteritems():
res[key] = json2tbl({key:val}, prefix, delem)
res[key] = json2tsv({key: val}, prefix, delem)

return res
4 changes: 2 additions & 2 deletions peregrine/utils/pybdbag.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def create_bdbag(bag_info, payload, max_row=1000):
bag_info: bdbag info
payload(json): resutl of graphql given a query
max_row(int): the row limitation of tsv files
Return:
Returns:
the path of bdbag zip file
"""

Expand All @@ -34,7 +34,7 @@ def create_bdbag(bag_info, payload, max_row=1000):
for dict_row in json_data:
header_set.update(dict_row.keys())

with open(bag_path + '/data/' + node_name+ '.tsv', 'w') as tsvfile:
with open(bag_path + '/data/' + node_name + '.tsv', 'w') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t')
row = []
for h in header_set:
Expand Down
8 changes: 4 additions & 4 deletions tests/graphql/test_graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from tests.graphql import utils
from tests.graphql.utils import data_fnames

from peregrine.utils import json2tbl
from peregrine.utils import json2tsv

BLGSP_PATH = '/v0/submission/CGCI/BLGSP/'
BRCA_PATH = '/v0/submission/TCGA/BRCA/'
Expand Down Expand Up @@ -1228,7 +1228,7 @@ def test_tx_log_comprehensive_query_failed_deletion(
assert 'errors' not in response.json, response.data


def test_json2tbl():
def test_json2tsv():

data = {"project": [
{
Expand All @@ -1245,7 +1245,7 @@ def test_json2tbl():
}]
}

res = json2tbl(data, '', '_')
res = json2tsv(data, '', '_')

assert len(res) == 1
assert res[0]['_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8'
Expand Down Expand Up @@ -1304,7 +1304,7 @@ def test_export(client, submitter, pg_driver_clean):
]
}
}
res = json2tbl(data, '', '_')
res = json2tsv(data, '', '_')

assert len(res) == 4
assert res[0]['_data_project_programs_name'] == 'DEV'
Expand Down

0 comments on commit 5cd47aa

Please sign in to comment.