From 5cd47aab35e38c4819dcc4630a8e85ad44fa414d Mon Sep 17 00:00:00 2001
From: Giang Bui <giangbui@uchicago.edu>
Date: Wed, 28 Mar 2018 15:25:52 -0500
Subject: [PATCH] chore(bagit): change method names

change dicts2tbl -> dicts2tsv
change table -> tsv
---
 peregrine/resources/submission/__init__.py | 36 -------------
 peregrine/utils/__init__.py                |  2 +-
 peregrine/utils/json2csv.py                | 60 ++++++++++++----------
 peregrine/utils/pybdbag.py                 |  4 +-
 tests/graphql/test_graphql.py              |  8 +--
 5 files changed, 41 insertions(+), 69 deletions(-)

diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py
index af9461e5..0f79b063 100644
--- a/peregrine/resources/submission/__init__.py
+++ b/peregrine/resources/submission/__init__.py
@@ -148,39 +148,3 @@ def root_graphql_schema_query():
             graphql.execute_query(get_introspection_query())
         )
     )
-
-# @peregrine.blueprints.blueprint.route('/export', methods=['POST'])
-# def get_manifest():
-#     """
-#     Creates and returns a manifest based on the filters pased on
-#     to this endpoint
-#     parameters:
-#         - name: filters
-#           in: graphql result in json format
-#           description: Filters to be applied when generating the manifest
-#     :return: A manifest that the user can use to download the files in there
-#     """
-#     payload = peregrine.utils.parse_request_json()
-#     export_data = payload.get('export_data')
-#     bag_path = payload.get('bag_path')
-
-#     if(bag_path is None):
-#         return flask.jsonify({'bag_path': None, 'errors': 'bag_path is required!!!'}), 400
-
-#     if peregrine.utils.contain_node_with_category(export_data, 'data_file') == False:
-#         return flask.jsonify({'errors': 'No data_file node'}), 400
-
-#     res = peregrine.utils.json2tbl(export_data, '', "_")
-#     tsv = peregrine.utils.dicts2tsv(res)
-
-#     bag_info = {'organization': 'CDIS',
-#                 'data_type': 'TOPMed',
-#                 'date_created': datetime.date.today().isoformat()}
-#     args = dict(
-#         bag_path=bag_path,
-#         bag_info=bag_info,
-#         payload=res)
-#     # bag is a compressed file
-#     return peregrine.utils.create_bdbag(**args), 200
-
-#     # return flask.jsonify({'data': res}), 200
\ No newline at end of file
diff --git a/peregrine/utils/__init__.py b/peregrine/utils/__init__.py
index 0e7bef9b..158931b7 100644
--- a/peregrine/utils/__init__.py
+++ b/peregrine/utils/__init__.py
@@ -1,5 +1,5 @@
 from .payload import get_variables,jsonify_check_errors,parse_request_json,get_keys,contain_node_with_category
 from .pybdbag import create_bdbag
 from .scheduling import AsyncPool
-from .json2csv import flatten_obj,json2tbl, dicts2tsv, flatten_json
+from .json2csv import flatten_obj,json2tsv, dicts2tsv, flatten_json
 from .response import format_response
diff --git a/peregrine/utils/json2csv.py b/peregrine/utils/json2csv.py
index 3d959b01..d91af132 100644
--- a/peregrine/utils/json2csv.py
+++ b/peregrine/utils/json2csv.py
@@ -53,11 +53,14 @@ def dicts2tsv(dict_list):
     """
     Convert the list of dictionary to tsv format.
     Each element of the list represent a row in tsv
+    
     Args:
-        dict_list: list of dictionary
-    Return:
-        output string
+        dict_list(list): list of dictionary
+    
+    Returns:
+        output(str): string in tsv format
     """
+    
     tsv = ""
 
     header_set = set()
@@ -84,58 +87,63 @@ def dicts2tsv(dict_list):
     return tsv
 
 
-def join(table_list, L, index, row):
+def join(tsv_list, L, index, row):
     '''
-    Join sub tables to generate a big table
+    Join list of sub tsv to generate a big tsv
 
     Args:
-        table_list: list of tables. Each table is represented by a list of dictionary
-        L: joined table that is iteratively updated
-        index: int
-        row: dictionary
+        tsv_list(list): list of tables or tvs. Each element is represented by a list of dictionary
+        L(list): joined table that is iteratively updated
+        index(int): the index of the table will be joined
+        row(dict): the current joining row
 
-    Return: None
+    Returns: None
     '''
-    if index == len(table_list):
+    if index == len(tsv_list):
         L.append(row)
     else:
-        for item in table_list[index]:
+        for item in tsv_list[index]:
             newrow = row.copy()
             newrow.update(item)
-            join(table_list, L, index + 1, newrow)
+            join(tsv_list, L, index + 1, newrow)
 
 
-def json2tbl(json, prefix, delem):
+def json2tsv(json, prefix, delem):
     '''
+    Convert json file to tsv format
+
     Args:
-        json: graphQL output JSON
-        prefix: prefix string
-        delem: delimitter
-    Output: list of dictionary representing a table. Each item in the list represent a row data.
-            each row is a dictionary with column name key and value at that position
+        json(json) graphQL output JSON
+        prefix(str) prefix string
+        delem(char): delimitter .e.g '\t'
 
+    Returns: 
+        list of dictionary representing a tsv file. Each item in the list represent a row data.
+        each row is a dictionary with column name key and value at that position
     '''
+
     L = []
     if isinstance(json, list) and json != []:
         for l in json:
-            L += (json2tbl(l, prefix, delem))
+            L += (json2tsv(l, prefix, delem))
         return L
     if isinstance(json, dict):
         # handle dictionary
-        table_list = []
+        tsv_list = []
         for k in json.keys():
-            table = json2tbl(json[k], prefix + delem + k, delem)
-            table_list.append(table)
+            tsv = json2tsv(json[k], prefix + delem + k, delem)
+            tsv_list.append(tsv)
 
-        join(table_list, L, 0, {})
+        join(tsv_list, L, 0, {})
     else:
         L.append({prefix: json})
     return L
 
+
 def flatten_json(json, prefix, delem):
     data = json['data']
     res = {}
     for key, val in data.iteritems():
-        res[key] = json2tbl({key:val}, prefix, delem)
-    
+        res[key] = json2tsv({key: val}, prefix, delem)
+
     return res
diff --git a/peregrine/utils/pybdbag.py b/peregrine/utils/pybdbag.py
index 605361ea..6c94ea80 100644
--- a/peregrine/utils/pybdbag.py
+++ b/peregrine/utils/pybdbag.py
@@ -16,7 +16,7 @@ def create_bdbag(bag_info, payload, max_row=1000):
         bag_info: bdbag info
         payload(json): resutl of graphql given a query
         max_row(int): the row limitation of tsv files
-    Return:
+    Returns:
         the path of bdbag zip file
     """
 
@@ -34,7 +34,7 @@ def create_bdbag(bag_info, payload, max_row=1000):
         for dict_row in json_data:
             header_set.update(dict_row.keys())
 
-        with open(bag_path + '/data/' + node_name+ '.tsv', 'w') as tsvfile:
+        with open(bag_path + '/data/' + node_name + '.tsv', 'w') as tsvfile:
             writer = csv.writer(tsvfile, delimiter='\t')
             row = []
             for h in header_set:
diff --git a/tests/graphql/test_graphql.py b/tests/graphql/test_graphql.py
index 657c2f45..598c0363 100644
--- a/tests/graphql/test_graphql.py
+++ b/tests/graphql/test_graphql.py
@@ -11,7 +11,7 @@
 from tests.graphql import utils
 from tests.graphql.utils import data_fnames
 
-from peregrine.utils import json2tbl
+from peregrine.utils import json2tsv
 
 BLGSP_PATH = '/v0/submission/CGCI/BLGSP/'
 BRCA_PATH = '/v0/submission/TCGA/BRCA/'
@@ -1228,7 +1228,7 @@ def test_tx_log_comprehensive_query_failed_deletion(
     assert 'errors' not in response.json, response.data
 
 
-def test_json2tbl():
+def test_json2tsv():
 
     data = {"project": [
         {
@@ -1245,7 +1245,7 @@ def test_json2tbl():
         }]
     }
 
-    res = json2tbl(data, '', '_')
+    res = json2tsv(data, '', '_')
 
     assert len(res) == 1
     assert res[0]['_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8'
@@ -1304,7 +1304,7 @@ def test_export(client, submitter, pg_driver_clean):
         ]
     }
     }
-    res = json2tbl(data, '', '_')
+    res = json2tsv(data, '', '_')
 
     assert len(res) == 4
     assert res[0]['_data_project_programs_name'] == 'DEV'