Skip to content

Commit

Permalink
remove validation
Browse files Browse the repository at this point in the history
  • Loading branch information
dippindots committed Dec 19, 2019
1 parent 58cbfd6 commit 297df89
Show file tree
Hide file tree
Showing 31 changed files with 42 additions and 417 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public static GeneticProfile loadGeneticProfile(File file) throws IOException, D
validateGenericAssay(geneticProfile, file);
geneticProfile.setGenericAssayType(geneticProfile.getOtherMetaDataField("generic_assay_type"));
// if genericAssayType is TREATMENT_RESPONSE, validate and set pivotThreshold and sortOrder
if (geneticProfile.getGenericAssayType() == "TREATMENT_RESPONSE") {
if (geneticProfile.getGenericAssayType().equals("TREATMENT_RESPONSE")) {
validateTreatmentResponse(geneticProfile, file);
geneticProfile.setPivotThreshold(Float.parseFloat(geneticProfile.getOtherMetaDataField("pivot_threshold_value")));
geneticProfile.setSortOrder(geneticProfile.getOtherMetaDataField("value_sort_order"));
Expand Down
161 changes: 2 additions & 159 deletions core/src/main/scripts/importer/validateData.py
Original file line number Diff line number Diff line change
Expand Up @@ -3837,8 +3837,7 @@ def parseFeatureColumns(self, nonsample_col_vals):
def onComplete(self):

def checkConsistencyFeatures(self):
"""This function validates whether the treatments in the treatment response files (IC50, EC50,
GI50, AUC, ...) are the same"""
"""This function validates whether the features in the data files are the same"""

# If the prior_validated_features_ids is not filled yet, fill it with the first file.
if self.get_prior_validated_feature_ids() is None:
Expand Down Expand Up @@ -3894,81 +3893,6 @@ def get_message_features_do_not_match(cls):
return "Gene sets column in score and p-value file are not equal. The same set of gene sets should be used in the score and p-value files for this study. Please ensure that all gene set id's of one file are present in the other gene set data file."


class TreatmentWiseFileValidator(MultipleDataFileValidator, metaclass=ABCMeta):
"""Groups multiple treatment response files from a study to ensure consistency.
All Validator classes that check validity of different treatment response data
types in a study should inherit from this class.
"""
prior_validated_sample_ids = None
prior_validated_feature_ids = None
prior_validated_header = None
REQUIRED_HEADERS = ['entity_stable_id']
OPTIONAL_HEADERS = ['META:name', 'META:description', 'META:url']
UNIQUE_COLUMNS = ['entity_stable_id','META:name']

def parseFeatureColumns(self, nonsample_col_vals):
self.checkDifferentNameInDb(nonsample_col_vals)
return super(TreatmentWiseFileValidator, self).parseFeatureColumns(nonsample_col_vals)

def checkDifferentNameInDb(self, nonsample_col_vals):
"""Raise warnings for discrepancies with how the db names treatments.
Check for different combinations of entity_stable_id and name of the treatment
in the database. If true, raise warnings for each discrepancy.
"""
nonsample_cols = self.nonsample_cols
if 'META:name' not in nonsample_cols or self.portal.treatment_map is None:
return

entity_stable_id = nonsample_col_vals[nonsample_cols.index("entity_stable_id")]
file_treatment_name = nonsample_col_vals[nonsample_cols.index("META:name")]

# check whether a name for the treatment has been
# registered in the database
db_treatment = self.portal.treatment_map.get(entity_stable_id)

# when a name has been registered for this treatment and
# is different from the new name, issue a warning.
if db_treatment is not None and db_treatment['name'] != file_treatment_name:
self.logger.warning(
"Name `%s` for treatment `%s` is different from name "
"`%s` present in the cBioPortal database. "
"Treatment names in cBioPortal always reflect treatment names "
"in the last imported study.",
file_treatment_name, entity_stable_id, db_treatment['name'],
extra={'line_number': self.line_number,
'cause': file_treatment_name})

@staticmethod
def get_prior_validated_header():
return TreatmentWiseFileValidator.prior_validated_header

@staticmethod
def set_prior_validated_header(header_names):
TreatmentWiseFileValidator.prior_validated_header = header_names

@staticmethod
def get_prior_validated_feature_ids():
return TreatmentWiseFileValidator.prior_validated_feature_ids

@staticmethod
def set_prior_validated_feature_ids(feature_ids):
TreatmentWiseFileValidator.prior_validated_feature_ids = feature_ids

@staticmethod
def get_prior_validated_sample_ids():
return TreatmentWiseFileValidator.prior_validated_sample_ids

@staticmethod
def set_prior_validated_sample_ids(sample_ids):
TreatmentWiseFileValidator.prior_validated_sample_ids = sample_ids

@classmethod
def get_message_features_do_not_match(cls):
return "Treatment feature columns (`entity_stable_id`, ...) in treatment profile data files are not identical. The same set of treatments should be used across the different treatment data files for this study. Please ensure that all entity stable id's of one file are present in all other treatment files."


class GsvaScoreValidator(GsvaWiseFileValidator):

""" Validator for files containing scores per gene set from GSVA algorithm. The GSVA algorithm
Expand Down Expand Up @@ -4004,71 +3928,6 @@ def checkValue(self, value, col_index):
'cause': value})



class TreatmentValidator(TreatmentWiseFileValidator):

""" Validator for files containing treatment response values.
"""

# (1) Natural positive number (not 0)
# (2) Number may be prefixed by ">" or "<"; f.i. ">n" means that the treatment was ineffective at the highest tested concentration of n.
# (3) NA cell value is allowed; means treatment was not tested on a sample
# (4) Is an empty cell value allowed? (meaning treatment was not tested on a sample)
#
# Warnings for values:
# (1) Cell contains a value without decimals and is not prependend by ">"; value appears to be truncated but lacks ">" truncation indicator
def checkValue(self, value, col_index):
"""Check a value in a sample column."""

# value is not defined (empty cell)
stripped_value = value.strip()
if stripped_value == "":
self.logger.error("Cell is empty. A response value value is expected. Use 'NA' to indicate missing values.",
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})
return

# 'NA' is an allowed value. No further validations apply.
if stripped_value == 'NA':
return

# if the value is prefixed with '>' or '<' remove this prefix
# prior to evaluation of the numeric value
hasTruncSymbol = re.match("^[><]", stripped_value)
stripped_value = re.sub(r"^[><]\s*","", stripped_value)

try:
numeric_value = float(stripped_value)
except ValueError:
self.logger.error("Value cannot be interpreted as a floating point number and is not valid response value.",
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})
return

if math.isnan(numeric_value):
self.logger.error("Value is NaN, therefore, not a valid response value.",
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})
return

if math.isinf(numeric_value):
self.logger.error("Value is infinite and, therefore, not a valid response value.",
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})
return

if numeric_value % 1 == 0 and not hasTruncSymbol:
self.logger.warning("Value has no decimals and may represent an invalid response value.",
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})

return

class GenericAssayWiseFileValidator(FeaturewiseFileValidator):
""" Generic assay file base validator
"""
Expand Down Expand Up @@ -4622,7 +4481,7 @@ def validate_data_relations(validators_by_meta_type, logger):
def request_from_portal_api(server_url, api_name, logger):
"""Send a request to the portal API and return the decoded JSON object."""

if api_name in ['info', 'genesets', 'gene-panels', 'treatments']:
if api_name in ['info', 'genesets', 'gene-panels']:
service_url = server_url + '/api/' + api_name + "?pageSize=9999999"

# TODO: change API for genes, gene aliases and cancer types to non-legacy
Expand Down Expand Up @@ -4730,15 +4589,6 @@ def extract_ids(json_data, id_key):
result_set.add(data_item[id_key])
return list(result_set)


def index_treatment_data(json_data,
id_field='treatmentId'):
result_dict = {}
for data_item in json_data:
entity_stable_id = data_item[id_field]
result_dict[entity_stable_id] = data_item
return result_dict

# there is no dump function implemented for the /info API. Unable to retrieve version.
def load_portal_metadata(json_data):
return json_data
Expand All @@ -4762,9 +4612,6 @@ def load_portal_info(path, logger, offline=False):
('genesaliases',
lambda json_data: transform_symbol_entrez_map(
json_data, 'gene_alias')),
('treatments',
lambda json_data: index_treatment_data(
json_data, 'treatmentId')),
('genesets',
lambda json_data: extract_ids(json_data, 'genesetId')),
('genesets_version',
Expand All @@ -4787,7 +4634,6 @@ def load_portal_info(path, logger, offline=False):
alias_entrez_map=portal_dict['genesaliases'],
gene_set_list=portal_dict['genesets'],
gene_panel_list=portal_dict['gene-panels'],
treatment_map = portal_dict['treatments'],
geneset_version = portal_dict['genesets_version'],
offline=offline)

Expand Down Expand Up @@ -4876,8 +4722,6 @@ def validate_study(study_dir, portal_instance, logger, relaxed_mode, strict_maf_
logger.warning('Skipping validations relating to gene set identifiers')
if portal_instance.gene_panel_list is None:
logger.warning('Skipping validations relating to gene panel identifiers')
if portal_instance.treatment_map is None:
logger.warning('Skipping validations relating to treatment identifiers')

# walk over the meta files in the dir and get properties of the study
(validators_by_meta_type,
Expand Down Expand Up @@ -5096,7 +4940,6 @@ def main_validate(args):
alias_entrez_map=None,
gene_set_list=None,
gene_panel_list=None,
treatment_map=None,
geneset_version =None)
elif args.portal_info_dir:
portal_instance = load_portal_info(args.portal_info_dir, logger,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ public void testImportTreatmentData() throws Exception {
assertEquals("Name of Irinotecan", treatment1.getGenericEntityMetaProperties().get("NAME"));
assertEquals("Desc of Irinotecan", treatment1.getGenericEntityMetaProperties().get("DESCRIPTION"));
assertEquals("Url of Irinotecan", treatment1.getGenericEntityMetaProperties().get("URL"));

// TODO: test fields are updated after loading new treatment file
}

@Test
Expand All @@ -103,8 +101,6 @@ public void testImportGenericAssayData() throws Exception {
// Test whether fields were populated correctly
assertEquals("mean_1", genericAssayMeta1.getGenericEntityMetaProperties().get("name"));
assertEquals("mean_1", genericAssayMeta1.getGenericEntityMetaProperties().get("description"));

// TODO: test fields are updated after loading new generic assay meta file
}

private int getNumRecordsForGenericAssay() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,30 +386,16 @@ public void testLoadStudyEs0() throws Throwable {
// ...
String testTreatment = "Irinotecan";
assertNotNull(DaoGeneticEntity.getGeneticEntityByStableId(testTreatment));
// entity_stable_id NAME DESCRIPTION URL TCGA-A1-A0SB-01 TCGA-A1-A0SD-01
// ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB-01 TCGA-A1-A0SD-01
// TCGA-A1-A0SE-01 TCGA-A1-A0SH-01 TCGA-A2-A04U-01 TCGA-B6-A0RS-01
// TCGA-BH-A0HP-01 TCGA-BH-A18P-01
// Irinotecan ... ... ... NA 0.080764666 NA 0.06704437 0.069568723 0.034992039
// 0.740817904 0.209220141


// GenericAssayService treatmentDataService = applicationContext.getBean(GenericAssayService.class);
// List<GenericAssayData> treatmentData = genericAssayService.getGenericAssayDataInMultipleMolecularProfiles(Arrays.asList(testMutationalSignatureMolecularProfileIds),
// Arrays.asList("TCGA-A1-A0SB-01", "TCGA-A1-A0SH-01"), Arrays.asList(testMutationalSignatureStableIds), PersistenceConstants.SUMMARY_PROJECTION);
// assertEquals(2, mutationalSignatureData.size());
// assertEquals("18", mutationalSignatureData.get(0).getValue());
// assertEquals("13", mutationalSignatureData.get(1).getValue());

// TreatmentDataService treatmentDataService = applicationContext.getBean(TreatmentDataService.class);
// List<TreatmentMolecularData> treatmentData = treatmentDataService
// .fetchTreatmentData("study_es_0_treatment_ic50", "study_es_0_all", Arrays.asList(testTreatment));
// assertEquals(8, treatmentData.size());

// treatmentData = treatmentDataService.fetchTreatmentData("study_es_0_treatment_ic50",
// Arrays.asList("TCGA-A1-A0SB-01", "TCGA-A1-A0SH-01"), Arrays.asList(testTreatment));
// assertEquals(2, treatmentData.size());
// assertEquals("NA", treatmentData.get(0).getValue());
// assertEquals(0.06704437, Double.parseDouble(treatmentData.get(1).getValue()), 0.00001);
GenericAssayService treatmentDataService = applicationContext.getBean(GenericAssayService.class);
List<GenericAssayData> treatmentData = treatmentDataService.getGenericAssayData("study_es_0_treatment_ic50", "study_es_0_all", Arrays.asList(testTreatment), PersistenceConstants.SUMMARY_PROJECTION);
assertEquals(8, treatmentData.size());
assertEquals("NA", treatmentData.get(0).getValue());
assertEquals(0.080764666, Double.parseDouble(treatmentData.get(1).getValue()), 0.00001);

// ===== check study status
assertEquals(DaoCancerStudy.Status.AVAILABLE, DaoCancerStudy.getStatus("study_es_0"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ profile_name: IC50 values of compounds on cellular phenotype readout
profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines.
data_filename: data_treatment_ic50.txt
show_profile_in_analysis_tab: true
value_sort_order: asc
value_sort_order: asc
generic_entity_meta_properties: NAME,DESCRIPTION,URL
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ profile_name: IC50 values of compounds on cellular phenotype readout
profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines.
data_filename: data_treatment_ic50.txt
show_profile_in_analysis_tab: true
value_sort_order: asc
value_sort_order: asc
generic_entity_meta_properties: NAME,DESCRIPTION,URL
2 changes: 1 addition & 1 deletion core/src/test/resources/treatments/data_treatment_ic50.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
entity_stable_id NAME DESCRIPTION URL
ENTITY_STABLE_ID NAME DESCRIPTION URL
17-AAG Name of 17-AAG Desc of 17-AAG Url of 17-AAG
AEW541 Name of AEW541 Desc of AEW541 Url of AEW541
AZD0530 Name of AZD0530 Desc of AZD0530 Url of AZD0530
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
entity_stable_id NAME DESCRIPTION URL
ENTITY_STABLE_ID NAME DESCRIPTION URL
17-AAG Name of 17-AAG New New desc of 17-AAG Url of 17-AAG
AEW541 Name of AEW541 New desc of AEW541 Url of AEW541
AZD0530 Name of AZD0530 New desc of AZD0530 Url of AZD0530
Expand Down
4 changes: 2 additions & 2 deletions core/src/test/scripts/system_tests_validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,13 @@ def test_files_with_quotes(self):
def _resetClassVars():
"""Reset the state of classes that check mulitple files of the same type.
GsvaWiseFileValidator and TreatmentWiseFileValidator classes check
GsvaWiseFileValidator classes check
consistency between multiple data files by collecting information in class variables.
This implementation is not consistent with the unit test environment that simulates
different studies to be loaded. To ensure real-world fucntionality the class variables
should be reset before each unit test that tests multi file consistency."""

for c in [ validateData.TreatmentWiseFileValidator, validateData.GsvaWiseFileValidator ]:
for c in [ validateData.GsvaWiseFileValidator ]:
c.prior_validated_sample_ids = None
c.prior_validated_feature_ids = None
c.prior_validated_header = None
Expand Down

This file was deleted.

This file was deleted.

Loading

0 comments on commit 297df89

Please sign in to comment.