From 803ec621088bf467ea4e16d88438dd2e4a9b1857 Mon Sep 17 00:00:00 2001 From: s-paquette Date: Thu, 14 Jul 2016 16:35:38 -0700 Subject: [PATCH 1/9] -> Fixed a bug where null values weren't reporting count numbers for cohort filters -> Fixed a related bug where null values couldn't actually be queried for numeric range sets like BMI or age at diagnosis --- cohorts/views.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cohorts/views.py b/cohorts/views.py index 0856872a..31c1443d 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -349,11 +349,14 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): col_name = key_map[key] if should_be_queried: - count_query_set.append( - ('SELECT DISTINCT ms.%s, IF(counts.count IS NULL,0,counts.count) AS count FROM %s AS ms ' + - 'LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) as counts ON ' + - 'counts.%s = ms.%s;') % (col_name, 'metadata_samples', col_name, tmp_table_name, col_name, - col_name, col_name,) + count_query_set.append((""" + SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS + count + FROM %s AS ms + LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts + ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); + """) + % (col_name, col_name, col_name, 'metadata_samples', col_name, tmp_table_name, col_name, col_name, col_name, col_name, col_name) ) for query_str in count_query_set: From a20dda8985df042541d783bde88b145dddb7283c Mon Sep 17 00:00:00 2001 From: s-paquette Date: Thu, 14 Jul 2016 19:52:10 -0700 Subject: [PATCH 2/9] -> Filter selection will now only filter other counts, not the selected filters' counts -> If no filters or cohort are selected, queries run directly against the source table (because they're going to anyways) --- cohorts/views.py | 117 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 87 insertions(+), 30 deletions(-) diff --git a/cohorts/views.py b/cohorts/views.py index 31c1443d..5dae38e9 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -300,35 +300,75 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): if not obj['tables']: filters[key]['tables'].append('metadata_samples') - tmp_table_name = "filtered_samples_tmp_" + user.id.__str__() + "_" + make_id(6) - make_tmp_table_str = "CREATE TEMPORARY TABLE " + tmp_table_name + " AS SELECT * " - params_tuple = () counts = {} cursor = db.cursor() - # TODO: This should take into account variable tables; may require a UNION statement or similar - if cohort_id is not None: - make_tmp_table_str += "FROM cohorts_samples cs " - make_tmp_table_str += "JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id " - make_tmp_table_str += "WHERE cohort_id = %s " - params_tuple += (cohort_id,) - else: - make_tmp_table_str += "FROM metadata_samples ms " + # We need to perform 2 sets of queries: one with each filter excluded from the others, against the full + # metadata_samples/cohort JOIN, and one where all filters are applied to create a temporart table, and + # attributes *outside* that set are counted + + unfiltered_attr = [] + exclusionary_filter = {} + where_clause = None + filtered_join = 'metadata_samples ms' + + for attr in valid_attrs: + if attr not in filters: + unfiltered_attr.append(attr.split(':')[-1]) key_map = table_key_map['metadata_samples'] if 'metadata_samples' in table_key_map else False + # construct the WHERE clauses needed if filters.__len__() > 0: + if cohort_id is not None: + filtered_join = 'cohorts_samples cs JOIN metadata_samples ms ON cs.sample_id = ms.SampleBarcode' filter_copy = copy.deepcopy(filters) where_clause = build_where_clause(filter_copy, alt_key_map=key_map) - make_tmp_table_str += "WHERE " if cohort_id is None else "AND " - make_tmp_table_str += where_clause['query_str'] - params_tuple += where_clause['value_tuple'] + for filter in filters: + filter_copy = copy.deepcopy(filters) + del filter_copy[filter] + if filter_copy.__len__() <= 0: + exclusionary_filter[filter.split(':')[-1]] = {'query_str': None, 'value_tuple': None} + else: + ex_where_clause = build_where_clause(filter_copy, alt_key_map=key_map) + if cohort_id is not None: + ex_where_clause['query_str'] += ' AND cs.cohort_id=%s' + ex_where_clause['value_tuple'] += (cohort_id,) + exclusionary_filter[filter.split(':')[-1]] = ex_where_clause + + print >> sys.stdout, filters.__str__() + print >> sys.stdout, unfiltered_attr.__str__() + + query_table_name = None + tmp_table_name = None + + # Only create the temporary table if there's something to actually filter down the + # source table - otherwise, it's just a waste of time and memory + if unfiltered_attr.__len__() > 0 and (filters.__len__() > 0 or cohort_id is not None): + # TODO: This should take into account variable tables; may require a UNION statement or similar + tmp_table_name = "filtered_samples_tmp_" + user.id.__str__() + "_" + make_id(6) + query_table_name = tmp_table_name + make_tmp_table_str = "CREATE TEMPORARY TABLE " + tmp_table_name + " AS SELECT * " + + if cohort_id is not None: + make_tmp_table_str += "FROM cohorts_samples cs " + make_tmp_table_str += "JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id " + make_tmp_table_str += "WHERE cohort_id = %s " + params_tuple += (cohort_id,) + else: + make_tmp_table_str += "FROM metadata_samples ms " - make_tmp_table_str += ";" + if filters.__len__() > 0: + make_tmp_table_str += "WHERE " if cohort_id is None else "AND " + make_tmp_table_str += where_clause['query_str'] + params_tuple += where_clause['value_tuple'] - cursor.execute(make_tmp_table_str, params_tuple) + make_tmp_table_str += ";" + cursor.execute(make_tmp_table_str, params_tuple) + else: + query_table_name = 'metadata_samples' count_query_set = [] @@ -349,18 +389,34 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): col_name = key_map[key] if should_be_queried: - count_query_set.append((""" - SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS - count - FROM %s AS ms - LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts - ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); - """) - % (col_name, col_name, col_name, 'metadata_samples', col_name, tmp_table_name, col_name, col_name, col_name, col_name, col_name) - ) - - for query_str in count_query_set: - cursor.execute(query_str) + if col_name in unfiltered_attr: + count_query_set.append({'query_str':(""" + SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS + count + FROM %s ms + LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts + ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); + """) % (col_name, col_name, col_name, 'metadata_samples', col_name, query_table_name, col_name, col_name, col_name, col_name, col_name), + 'params': None, }) + else: + subquery = filtered_join + ((' WHERE ' + exclusionary_filter[col_name]['query_str']) if exclusionary_filter[col_name]['query_str'] else ' ') + print >> sys.stdout, subquery + count_query_set.append({'query_str':(""" + SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS + count + FROM %s AS ms + LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts + ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); + """) % (col_name, col_name, col_name, 'metadata_samples', col_name, subquery, col_name, col_name, col_name, col_name, col_name), + 'params': exclusionary_filter[col_name]['value_tuple']}) + + for query in count_query_set: + print >> sys.stdout, query.__str__() + if 'params' in query and query['params'] is not None: + cursor.execute(query['query_str'], query['params']) + else: + cursor.execute(query['query_str']) + colset = cursor.description col_headers = [] if colset is not None: @@ -373,8 +429,9 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): counts[col_headers[0]]['counts'][row[0]] = int(row[1]) counts[col_headers[0]]['total'] += int(row[1]) - # Drop the temporary table - cursor.execute("DROP TEMPORARY TABLE IF EXISTS " + tmp_table_name); + # Drop the temporary table, if we made one + if tmp_table_name is not None: + cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_table_name) counts_and_total['participants'] = get_participant_count(filters, cohort_id) counts_and_total['counts'] = [] From 69e16c2878a9ab07f9f1c62914d54de668087edb Mon Sep 17 00:00:00 2001 From: s-paquette Date: Thu, 14 Jul 2016 20:14:09 -0700 Subject: [PATCH 3/9] -> Bugfixes for cohort filtering --- cohorts/views.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/cohorts/views.py b/cohorts/views.py index 5dae38e9..eee679a6 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -330,13 +330,19 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): filter_copy = copy.deepcopy(filters) del filter_copy[filter] if filter_copy.__len__() <= 0: - exclusionary_filter[filter.split(':')[-1]] = {'query_str': None, 'value_tuple': None} + ex_where_clause = {'query_str': None, 'value_tuple': None} else: ex_where_clause = build_where_clause(filter_copy, alt_key_map=key_map) - if cohort_id is not None: - ex_where_clause['query_str'] += ' AND cs.cohort_id=%s' - ex_where_clause['value_tuple'] += (cohort_id,) - exclusionary_filter[filter.split(':')[-1]] = ex_where_clause + if cohort_id is not None: + if ex_where_clause['query_str'] is not None: + ex_where_clause['query_str'] += ' AND ' + else: + ex_where_clause['query_str'] = '' + ex_where_clause['value_tuple'] = () + ex_where_clause['query_str'] += ' cs.cohort_id=%s ' + ex_where_clause['value_tuple'] += (cohort_id,) + + exclusionary_filter[filter.split(':')[-1]] = ex_where_clause print >> sys.stdout, filters.__str__() print >> sys.stdout, unfiltered_attr.__str__() @@ -355,7 +361,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): if cohort_id is not None: make_tmp_table_str += "FROM cohorts_samples cs " make_tmp_table_str += "JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id " - make_tmp_table_str += "WHERE cohort_id = %s " + make_tmp_table_str += "WHERE cs.cohort_id = %s " params_tuple += (cohort_id,) else: make_tmp_table_str += "FROM metadata_samples ms " @@ -366,6 +372,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): params_tuple += where_clause['value_tuple'] make_tmp_table_str += ";" + print >> sys.stdout, make_tmp_table_str cursor.execute(make_tmp_table_str, params_tuple) else: query_table_name = 'metadata_samples' @@ -429,6 +436,8 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): counts[col_headers[0]]['counts'][row[0]] = int(row[1]) counts[col_headers[0]]['total'] += int(row[1]) + print >> sys.stdout, counts.__str__() + # Drop the temporary table, if we made one if tmp_table_name is not None: cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_table_name) From 0a07bab0b2098a55ab51488b5834ddaa9421fe4b Mon Sep 17 00:00:00 2001 From: s-paquette Date: Thu, 14 Jul 2016 20:32:42 -0700 Subject: [PATCH 4/9] -> Bugfixes for cohort filtering --- cohorts/views.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/cohorts/views.py b/cohorts/views.py index eee679a6..9fc00bf9 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -167,24 +167,25 @@ def get_filter_values(): ''' Begin metadata counting methods ''' # TODO: needs to be refactored to use other samples tables -def get_participant_count(filter="", cohort_id=None): +def get_participant_and_sample_count(filter="", cohort_id=None): db = get_sql_connection() cursor = None + counts = {} try: cursor = db.cursor(MySQLdb.cursors.DictCursor) param_tuple = () - query_str = "SELECT COUNT(DISTINCT ParticipantBarcode) AS participant_count " + query_str_lead = "SELECT COUNT(DISTINCT %s) AS %s " if cohort_id is not None: - query_str += "FROM cohorts_samples cs JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id " + query_str = "FROM cohorts_samples cs JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id " query_str += "WHERE cs.cohort_id = %s " param_tuple += (cohort_id,) else: - query_str += "FROM metadata_samples ms " + query_str = "FROM metadata_samples ms " if filter.__len__() > 0: where_clause = build_where_clause(filter) @@ -192,12 +193,17 @@ def get_participant_count(filter="", cohort_id=None): query_str += where_clause['query_str'] param_tuple += where_clause['value_tuple'] - cursor.execute(query_str, param_tuple) + cursor.execute((query_str_lead % ('ParticipantBarcode', 'participant_count')) + query_str, param_tuple) for row in cursor.fetchall(): - count = row['participant_count'] + counts['participant_count'] = row['participant_count'] + + cursor.execute((query_str_lead % ('SampleBarcode', 'sample_count')) + query_str, param_tuple) - return count + for row in cursor.fetchall(): + counts['sample_count'] = row['sample_count'] + + return counts except Exception as e: print traceback.format_exc() @@ -442,9 +448,11 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): if tmp_table_name is not None: cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_table_name) - counts_and_total['participants'] = get_participant_count(filters, cohort_id) + sample_and_participant_counts = get_participant_and_sample_count(filters, cohort_id); + + counts_and_total['participants'] = sample_and_participant_counts['participant_count'] + counts_and_total['total'] = sample_and_participant_counts['sample_count'] counts_and_total['counts'] = [] - counts_and_total['total'] = 0 for key, feature in valid_attrs.items(): value_list = [] @@ -467,8 +475,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): value_list.append({'value': str(value), 'count': count}) counts_and_total['counts'].append({'name': feature['name'], 'values': value_list, 'id': key, 'total': feature['total']}) - if feature['total'] > counts_and_total['total']: - counts_and_total['total'] = feature['total'] return counts_and_total From f06c3e4baa58c24ea16e61cdca27b69b08e9cae0 Mon Sep 17 00:00:00 2001 From: s-paquette Date: Thu, 14 Jul 2016 20:46:09 -0700 Subject: [PATCH 5/9] -> Bugfixes for cohort filtering --- cohorts/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cohorts/views.py b/cohorts/views.py index 9fc00bf9..4768e84b 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -312,7 +312,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): cursor = db.cursor() # We need to perform 2 sets of queries: one with each filter excluded from the others, against the full - # metadata_samples/cohort JOIN, and one where all filters are applied to create a temporart table, and + # metadata_samples/cohort JOIN, and one where all filters are applied to create a temporary table, and # attributes *outside* that set are counted unfiltered_attr = [] From fd20d5822909f6143e3080e54ad749ff6fe90901 Mon Sep 17 00:00:00 2001 From: s-paquette Date: Thu, 14 Jul 2016 20:56:55 -0700 Subject: [PATCH 6/9] -> Bugfixes for cohort filtering --- cohorts/views.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cohorts/views.py b/cohorts/views.py index 4768e84b..f0efd5a3 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -350,9 +350,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): exclusionary_filter[filter.split(':')[-1]] = ex_where_clause - print >> sys.stdout, filters.__str__() - print >> sys.stdout, unfiltered_attr.__str__() - query_table_name = None tmp_table_name = None @@ -378,7 +375,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): params_tuple += where_clause['value_tuple'] make_tmp_table_str += ";" - print >> sys.stdout, make_tmp_table_str cursor.execute(make_tmp_table_str, params_tuple) else: query_table_name = 'metadata_samples' @@ -424,7 +420,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): 'params': exclusionary_filter[col_name]['value_tuple']}) for query in count_query_set: - print >> sys.stdout, query.__str__() if 'params' in query and query['params'] is not None: cursor.execute(query['query_str'], query['params']) else: @@ -442,8 +437,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): counts[col_headers[0]]['counts'][row[0]] = int(row[1]) counts[col_headers[0]]['total'] += int(row[1]) - print >> sys.stdout, counts.__str__() - # Drop the temporary table, if we made one if tmp_table_name is not None: cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_table_name) From 1ca1d184ff12e2e5d12570c93d8558a91171c4fa Mon Sep 17 00:00:00 2001 From: s-paquette Date: Fri, 15 Jul 2016 12:02:20 -0700 Subject: [PATCH 7/9] -> Create a cohort tmp table to prevent querying of source tables when possible --- cohorts/views.py | 59 +++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/cohorts/views.py b/cohorts/views.py index f0efd5a3..94bde40a 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -318,7 +318,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): unfiltered_attr = [] exclusionary_filter = {} where_clause = None - filtered_join = 'metadata_samples ms' for attr in valid_attrs: if attr not in filters: @@ -328,8 +327,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): # construct the WHERE clauses needed if filters.__len__() > 0: - if cohort_id is not None: - filtered_join = 'cohorts_samples cs JOIN metadata_samples ms ON cs.sample_id = ms.SampleBarcode' filter_copy = copy.deepcopy(filters) where_clause = build_where_clause(filter_copy, alt_key_map=key_map) for filter in filters: @@ -350,24 +347,28 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): exclusionary_filter[filter.split(':')[-1]] = ex_where_clause - query_table_name = None - tmp_table_name = None + base_table = 'metadata_samples' + tmp_cohort_table = None + tmp_filter_table = None - # Only create the temporary table if there's something to actually filter down the - # source table - otherwise, it's just a waste of time and memory - if unfiltered_attr.__len__() > 0 and (filters.__len__() > 0 or cohort_id is not None): + # If there is a cohort, make a temporary table based on it and make it the base table + if cohort_id is not None: + tmp_cohort_table = "cohort_tmp_" + user.id.__str__() + "_" + make_id(6) + base_table = tmp_cohort_table + make_cohort_table_str = """ + CREATE TEMPORARY TABLE %s AS SELECT * + FROM cohorts_samples cs + JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id + """ % tmp_cohort_table + make_cohort_table_str += 'WHERE cs.cohort_id = %s;' + params_tuple += (cohort_id,) + cursor.execute(make_cohort_table_str, params_tuple) + + # If there are filters, create a temporary table filtered off the base table + if unfiltered_attr.__len__() > 0 and filters.__len__() > 0: # TODO: This should take into account variable tables; may require a UNION statement or similar - tmp_table_name = "filtered_samples_tmp_" + user.id.__str__() + "_" + make_id(6) - query_table_name = tmp_table_name - make_tmp_table_str = "CREATE TEMPORARY TABLE " + tmp_table_name + " AS SELECT * " - - if cohort_id is not None: - make_tmp_table_str += "FROM cohorts_samples cs " - make_tmp_table_str += "JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id " - make_tmp_table_str += "WHERE cs.cohort_id = %s " - params_tuple += (cohort_id,) - else: - make_tmp_table_str += "FROM metadata_samples ms " + tmp_filter_table = "filtered_samples_tmp_" + user.id.__str__() + "_" + make_id(6) + make_tmp_table_str = 'CREATE TEMPORARY TABLE %s AS SELECT * FROM %s ' % (tmp_filter_table, base_table,) if filters.__len__() > 0: make_tmp_table_str += "WHERE " if cohort_id is None else "AND " @@ -376,8 +377,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): make_tmp_table_str += ";" cursor.execute(make_tmp_table_str, params_tuple) - else: - query_table_name = 'metadata_samples' count_query_set = [] @@ -400,20 +399,18 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): if should_be_queried: if col_name in unfiltered_attr: count_query_set.append({'query_str':(""" - SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS - count + SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS count FROM %s ms LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); - """) % (col_name, col_name, col_name, 'metadata_samples', col_name, query_table_name, col_name, col_name, col_name, col_name, col_name), + """) % (col_name, col_name, col_name, 'metadata_samples', col_name, tmp_filter_table, col_name, col_name, col_name, col_name, col_name), 'params': None, }) else: - subquery = filtered_join + ((' WHERE ' + exclusionary_filter[col_name]['query_str']) if exclusionary_filter[col_name]['query_str'] else ' ') + subquery = base_table + ((' WHERE ' + exclusionary_filter[col_name]['query_str']) if exclusionary_filter[col_name]['query_str'] else ' ') print >> sys.stdout, subquery count_query_set.append({'query_str':(""" - SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS - count - FROM %s AS ms + SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS count + FROM %s ms LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); """) % (col_name, col_name, col_name, 'metadata_samples', col_name, subquery, col_name, col_name, col_name, col_name, col_name), @@ -437,9 +434,9 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): counts[col_headers[0]]['counts'][row[0]] = int(row[1]) counts[col_headers[0]]['total'] += int(row[1]) - # Drop the temporary table, if we made one - if tmp_table_name is not None: - cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_table_name) + # Drop the temporary tables + if tmp_cohort_table is not None: cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_cohort_table) + if tmp_filter_table is not None: cursor.execute(("DROP TEMPORARY TABLE IF EXISTS %s") % tmp_filter_table) sample_and_participant_counts = get_participant_and_sample_count(filters, cohort_id); From 0d24d2c3ff72e1fd7a3e061debba63a76d06fc5b Mon Sep 17 00:00:00 2001 From: s-paquette Date: Fri, 15 Jul 2016 13:44:22 -0700 Subject: [PATCH 8/9] -> 1466 bugfix to a behavior upgrade --- cohorts/views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cohorts/views.py b/cohorts/views.py index 94bde40a..2f400990 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -348,6 +348,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): exclusionary_filter[filter.split(':')[-1]] = ex_where_clause base_table = 'metadata_samples' + filter_table = 'metadata_samples' tmp_cohort_table = None tmp_filter_table = None @@ -368,6 +369,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): if unfiltered_attr.__len__() > 0 and filters.__len__() > 0: # TODO: This should take into account variable tables; may require a UNION statement or similar tmp_filter_table = "filtered_samples_tmp_" + user.id.__str__() + "_" + make_id(6) + filter_table = tmp_filter_table make_tmp_table_str = 'CREATE TEMPORARY TABLE %s AS SELECT * FROM %s ' % (tmp_filter_table, base_table,) if filters.__len__() > 0: @@ -403,7 +405,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): FROM %s ms LEFT JOIN (SELECT DISTINCT %s, COUNT(1) as count FROM %s GROUP BY %s) AS counts ON counts.%s = ms.%s OR (counts.%s IS NULL AND ms.%s IS NULL); - """) % (col_name, col_name, col_name, 'metadata_samples', col_name, tmp_filter_table, col_name, col_name, col_name, col_name, col_name), + """) % (col_name, col_name, col_name, 'metadata_samples', col_name, filter_table, col_name, col_name, col_name, col_name, col_name), 'params': None, }) else: subquery = base_table + ((' WHERE ' + exclusionary_filter[col_name]['query_str']) if exclusionary_filter[col_name]['query_str'] else ' ') From b853573ae101892c93534c2063cfef8f3b1a4af8 Mon Sep 17 00:00:00 2001 From: s-paquette Date: Fri, 15 Jul 2016 20:34:24 -0700 Subject: [PATCH 9/9] -> Bugfix --- cohorts/models.py | 1 - cohorts/views.py | 17 ++++------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/cohorts/models.py b/cohorts/models.py index 3c898f09..64ed0818 100755 --- a/cohorts/models.py +++ b/cohorts/models.py @@ -162,7 +162,6 @@ def get_creation_filters(self): else: cohort = None - print >> sys.stdout, "Creation filters: " + filter_list.__str__() return filter_list ''' diff --git a/cohorts/views.py b/cohorts/views.py index 2f400990..db77c3a4 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -336,14 +336,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): ex_where_clause = {'query_str': None, 'value_tuple': None} else: ex_where_clause = build_where_clause(filter_copy, alt_key_map=key_map) - if cohort_id is not None: - if ex_where_clause['query_str'] is not None: - ex_where_clause['query_str'] += ' AND ' - else: - ex_where_clause['query_str'] = '' - ex_where_clause['value_tuple'] = () - ex_where_clause['query_str'] += ' cs.cohort_id=%s ' - ex_where_clause['value_tuple'] += (cohort_id,) exclusionary_filter[filter.split(':')[-1]] = ex_where_clause @@ -362,8 +354,7 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): JOIN metadata_samples ms ON ms.SampleBarcode = cs.sample_id """ % tmp_cohort_table make_cohort_table_str += 'WHERE cs.cohort_id = %s;' - params_tuple += (cohort_id,) - cursor.execute(make_cohort_table_str, params_tuple) + cursor.execute(make_cohort_table_str, (cohort_id,)) # If there are filters, create a temporary table filtered off the base table if unfiltered_attr.__len__() > 0 and filters.__len__() > 0: @@ -373,12 +364,13 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): make_tmp_table_str = 'CREATE TEMPORARY TABLE %s AS SELECT * FROM %s ' % (tmp_filter_table, base_table,) if filters.__len__() > 0: - make_tmp_table_str += "WHERE " if cohort_id is None else "AND " - make_tmp_table_str += where_clause['query_str'] + make_tmp_table_str += 'WHERE %s ' % where_clause['query_str'] params_tuple += where_clause['value_tuple'] make_tmp_table_str += ";" cursor.execute(make_tmp_table_str, params_tuple) + else: + filter_table = base_table count_query_set = [] @@ -409,7 +401,6 @@ def count_metadata(user, cohort_id=None, sample_ids=None, filters=None): 'params': None, }) else: subquery = base_table + ((' WHERE ' + exclusionary_filter[col_name]['query_str']) if exclusionary_filter[col_name]['query_str'] else ' ') - print >> sys.stdout, subquery count_query_set.append({'query_str':(""" SELECT DISTINCT IF(ms.%s IS NULL,'None',ms.%s) AS %s, IF(counts.count IS NULL,0,counts.count) AS count FROM %s ms