diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java index 9754e8b49f7..ad5372e7306 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java @@ -237,13 +237,16 @@ public static void importData(File dataFile, GeneticAlterationType geneticAltera currentLine = buf.readLine(); } - // show import message - if (updateInfo) { - ProgressMonitor.setCurrentMessage(updatedEntities.size() + " generic entities existing in the database that were overridden during import."); - } else { - ProgressMonitor.setCurrentMessage(notUpdatedEntities.size() + " generic entities existing in the database that were not overridden during import."); + // show import result message + if (updatedEntities.size() > 0) { + ProgressMonitor.setCurrentMessage("--> Entities updated: " + updatedEntities.size() + " generic entities existing in the database that were overridden during import."); + } + if (notUpdatedEntities.size() > 0) { + ProgressMonitor.setCurrentMessage("--> Entities not updated: " + notUpdatedEntities.size() + " generic entities existing in the database that were not overridden during import."); + } + if (newEntities.size() > 0) { + ProgressMonitor.setCurrentMessage("--> New Entities: " + newEntities.size() + " generic entities have been imported into database during import."); } - ProgressMonitor.setCurrentMessage(newEntities.size() + " generic entities have been imported into database during import."); reader.close(); diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java index 42c93200e6d..1c9a8c6f220 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java @@ -56,9 +56,14 @@ public void run() { // Parse arguments // using a real options parser, helps avoid bugs String description = "Import 'profile' files that contain data matrices indexed by gene, case"; - OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true); + OptionSet options = ConsoleUtil.parseStandardDataAndMetaUpdateOptions(args, description, true); File dataFile = new File((String) options.valueOf("data")); File descriptorFile = new File((String) options.valueOf( "meta" ) ); + // Check options, set default as true + boolean updateInfo = true; + if (options.has("update-info") && (((String) options.valueOf("update-info")).equalsIgnoreCase("false") || options.valueOf("update-info").equals("0"))) { + updateInfo = false; + } SpringUtil.initDataSource(); ProgressMonitor.setCurrentMessage("Reading data from: " + dataFile.getAbsolutePath()); // Load genetic profile and gene panel @@ -100,7 +105,7 @@ public void run() { importer.importData(); } else if (geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENERIC_ASSAY) { // add all missing `genetic_entities` for this assay to the database - ImportGenericAssayEntity.importData(dataFile, geneticProfile.getGeneticAlterationType(), geneticProfile.getOtherMetaDataField("generic_entity_meta_properties"), true); + ImportGenericAssayEntity.importData(dataFile, geneticProfile.getGeneticAlterationType(), geneticProfile.getOtherMetaDataField("generic_entity_meta_properties"), updateInfo); ImportTabDelimData genericAssayProfileImporter = new ImportTabDelimData(dataFile, geneticProfile.getTargetLine(), geneticProfile.getGeneticProfileId(), genePanel, geneticProfile.getOtherMetaDataField("generic_entity_meta_properties")); genericAssayProfileImporter.importData(numLines); diff --git a/core/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java b/core/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java index a4fccccd0c4..b5b362270ea 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java +++ b/core/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java @@ -226,6 +226,74 @@ public static OptionSet parseStandardDataAndStudyOptions(String[] args, String d "Error: 'study' argument required."); } + return options; + } + + /** + * Default method to be used when Importer class main method expects only 'data' and 'meta' as mandatory options + * and an optional 'loadMode' parameter and an optional 'update-info' parameter + * + * @param args: the same args given to main() method of the tool + * @param description: short description of the tool (to display in the usage line if necessary) + * @param hasLoadMode: set to true to let this method validate whether the command line argument loadMode was given + * + * @return the parsed options + */ + public static OptionSet parseStandardDataAndMetaUpdateOptions(String[] args, String description, boolean hasLoadMode) { + // using a real options parser, helps avoid bugs + OptionParser parser = new OptionParser(); + parser.accepts("noprogress", "this option can be given to avoid the messages regarding memory usage and % complete"); + OptionSpec help = parser.accepts( "help", "print this help info" ); + parser.accepts( "data", "profile data file" ).withRequiredArg().describedAs( "data_file.txt" ).ofType( String.class ); + parser.accepts( "update-info", "Update information for existing entities in the database").withOptionalArg().ofType(String.class); + parser.accepts( "meta", "meta (description) file" ).withRequiredArg().describedAs( "meta_file.txt" ).ofType( String.class ); + if (hasLoadMode) { + parser.accepts( "loadMode", "direct (per record) or bulk load of data" ) + .withRequiredArg().describedAs( "[directLoad|bulkLoad (default)]" ).ofType( String.class ); + } + String progName = "importScript"; + + OptionSet options = null; + try { + options = parser.parse( args ); + } catch (OptionException e) { + throw new UsageException(progName, description, parser, + e.getMessage()); + } + + if( options.has( help ) ){ + throw new UsageException(progName, description, parser); + } + + //these extra checks are needed, since withRequiredArg above only indicated that the option + //has a mandatory argument but does not make the option itself mandatory. + if(!options.has("data")) { + throw new UsageException(progName, description, parser, + "Error: 'data' argument required."); + } + + if(!options.has("meta")) { + throw new UsageException(progName, description, parser, + "Error: 'meta' argument required."); + } + + if (hasLoadMode) { + if( options.has( "loadMode" ) ){ + String actionArg = (String) options.valueOf( "loadMode" ); + if (actionArg.equalsIgnoreCase("directLoad")) { + MySQLbulkLoader.bulkLoadOff(); + } else if (actionArg.equalsIgnoreCase( "bulkLoad" )) { + MySQLbulkLoader.bulkLoadOn(); + } else { + throw new UsageException(progName, description, parser, + "Error: unknown loadMode action: " + actionArg); + } + } + else { + throw new UsageException(progName, description, parser, + "Error: 'loadMode' argument required."); + } + } return options; } } diff --git a/core/src/main/scripts/importer/cbioportalImporter.py b/core/src/main/scripts/importer/cbioportalImporter.py index c9202e9488f..14d4572d4d6 100755 --- a/core/src/main/scripts/importer/cbioportalImporter.py +++ b/core/src/main/scripts/importer/cbioportalImporter.py @@ -102,7 +102,7 @@ def remove_study_id(jvm_args, study_id): run_java(*args) -def import_study_data(jvm_args, meta_filename, data_filename, meta_file_dictionary = None): +def import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, meta_file_dictionary = None): args = jvm_args.split(' ') # In case the meta file is already parsed in a previous function, it is not @@ -114,6 +114,11 @@ def import_study_data(jvm_args, meta_filename, data_filename, meta_file_dictiona # Retrieve meta file type meta_file_type = meta_file_dictionary['meta_file_type'] + # Update entities by default + shouldUpdateGenericAssayEntities = True + if update_generic_assay_entity != None and update_generic_assay_entity.casefold() == "False".casefold(): + shouldUpdateGenericAssayEntities = False + # invalid file, skip if meta_file_type is None: print(("Unrecognized meta file type '%s', skipping file" @@ -133,6 +138,12 @@ def import_study_data(jvm_args, meta_filename, data_filename, meta_file_dictiona args.append(meta_filename) args.append("--loadMode") args.append("bulkload") + if importer == "org.mskcc.cbio.portal.scripts.ImportProfileData" and shouldUpdateGenericAssayEntities: + args.append("--update-info") + args.append("True") + elif importer == "org.mskcc.cbio.portal.scripts.ImportProfileData" and not shouldUpdateGenericAssayEntities: + args.append("--update-info") + args.append("False") if importer in ("org.mskcc.cbio.portal.scripts.ImportMutSigData", "org.mskcc.cbio.portal.scripts.ImportGisticData"): args.append("--data") args.append(data_filename) @@ -186,7 +197,7 @@ def process_case_lists(jvm_args, case_list_dir): if not (case_list.startswith('.') or case_list.endswith('~')): import_case_list(jvm_args, os.path.join(case_list_dir, case_list)) -def process_command(jvm_args, command, meta_filename, data_filename, study_ids): +def process_command(jvm_args, command, meta_filename, data_filename, study_ids, update_generic_assay_entity = None): if command == IMPORT_CANCER_TYPE: import_cancer_type(jvm_args, data_filename) elif command == IMPORT_STUDY: @@ -201,11 +212,11 @@ def process_command(jvm_args, command, meta_filename, data_filename, study_ids): else: raise RuntimeError('Your command uses both -id and -meta. Please, use only one of the two parameters.') elif command == IMPORT_STUDY_DATA: - import_study_data(jvm_args, meta_filename, data_filename) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity) elif command == IMPORT_CASE_LIST: import_case_list(jvm_args, meta_filename) -def process_directory(jvm_args, study_directory): +def process_directory(jvm_args, study_directory, update_generic_assay_entity = None): """ Import an entire study directory based on meta files found. @@ -338,47 +349,47 @@ def process_directory(jvm_args, study_directory): raise RuntimeError('No sample attribute file found') else: meta_filename, data_filename = sample_attr_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Next, we need to import resource definitions for resource data if resource_definition_filepair is not None: meta_filename, data_filename = resource_definition_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Next, we need to import sample definitions for resource data if sample_resource_filepair is not None: meta_filename, data_filename = sample_resource_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Next, import everything else except gene panel, fusion data, GSVA and # z-score expression. If in the future more types refer to each other, (like # in a tree structure) this could be programmed in a recursive fashion. for meta_filename, data_filename in regular_filepairs: - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Import fusion data (after mutation) if fusion_filepair is not None: meta_filename, data_filename = fusion_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Import expression z-score (after expression) for meta_filename, data_filename in zscore_filepairs: - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Import GSVA genetic profiles (after expression and z-scores) if gsva_score_filepair is not None: # First import the GSVA score data meta_filename, data_filename = gsva_score_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Second import the GSVA p-value data meta_filename, data_filename = gsva_pvalue_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) if gene_panel_matrix_filepair is not None: meta_filename, data_filename = gene_panel_matrix_filepair - import_study_data(jvm_args, meta_filename, data_filename, study_meta_dictionary[meta_filename]) + import_study_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity, study_meta_dictionary[meta_filename]) # Import the case lists case_list_dirname = os.path.join(study_directory, 'case_lists') @@ -456,6 +467,8 @@ def interface(): parser.add_argument('-id', '--study_ids', type=str, required=False, help='Cancer Study IDs for `remove-study` command, comma separated') + parser.add_argument('-update', '--update_generic_assay_entity', type=str, required=False, + help='Set as True to update the existing generic assay entities, set as False to keep the existing generic assay entities for generic assay') # TODO - add same argument to metaimporter # TODO - harmonize on - and _ @@ -517,11 +530,11 @@ def main(args): if study_directory != None: check_dir(study_directory) - process_directory(jvm_args, study_directory) + process_directory(jvm_args, study_directory, args.update_generic_assay_entity) else: check_args(args.command) check_files(args.meta_filename, args.data_filename) - process_command(jvm_args, args.command, args.meta_filename, args.data_filename, args.study_ids) + process_command(jvm_args, args.command, args.meta_filename, args.data_filename, args.study_ids, args.update_generic_assay_entity) # ------------------------------------------------------------------------------ # ready to roll diff --git a/core/src/main/scripts/importer/metaImport.py b/core/src/main/scripts/importer/metaImport.py index afdf5c0bcef..565d8d396d8 100755 --- a/core/src/main/scripts/importer/metaImport.py +++ b/core/src/main/scripts/importer/metaImport.py @@ -105,6 +105,8 @@ def interface(): 'report. For example, set this to a high number to ' 'report all genes that could not be loaded, instead ' 'of reporting "(GeneA, GeneB, GeneC, 213 more)".') + parser.add_argument('-update', '--update_generic_assay_entity', type=str, required=False, default="True", + help='Set as True to update the existing generic assay entities, set as False to keep the existing generic assay entities for generic assay') parser = parser.parse_args() return parser