Skip to content

Commit

Permalink
Cobalt: optionally GC profile min and max config 'gc_ratio_file_min/max'
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Dec 9, 2024
1 parent e3bfcb6 commit 413a66f
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,27 @@ public Table loadGCContent(ChromosomePositionCodec chromosomePosCodec) throws IO
for(GCProfile gcProfile : gcProfileList)
{
Row row = gcProfileTable.appendRow();

double gcContent = gcProfile.gcContent();

if(gcContent > 0 && mConfig.GcRatioFileMin > 0 && gcContent < mConfig.GcRatioFileMin)
continue;

if(mConfig.GcRatioFileMax < 1 && gcContent > mConfig.GcRatioFileMax)
continue;

long chrPosIndex = chromosomePosCodec.encodeChromosomePosition(gcProfile.chromosome(), gcProfile.start());
if (chrPosIndex > 0)

if(chrPosIndex > 0)
{
row.setLong(CobaltColumns.ENCODED_CHROMOSOME_POS, chrPosIndex);
}
else
{
throw new RuntimeException("Unknown chromosome: " + gcProfile.chromosome());
}
row.setDouble(CobaltColumns.GC_CONTENT, gcProfile.gcContent());

row.setDouble(CobaltColumns.GC_CONTENT, gcContent);
row.setBoolean(CobaltColumns.IS_MAPPABLE, gcProfile.isMappable());
row.setBoolean(CobaltColumns.IS_AUTOSOME, HumanChromosome.fromString(gcProfile.chromosome()).isAutosome());
}
Expand Down
17 changes: 15 additions & 2 deletions cobalt/src/main/java/com/hartwig/hmftools/cobalt/CobaltConfig.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.hartwig.hmftools.cobalt;

import static com.hartwig.hmftools.cobalt.CobaltConstants.DEFAULT_GC_RATIO_FILE_MAX;
import static com.hartwig.hmftools.cobalt.CobaltConstants.DEFAULT_GC_RATIO_FILE_MIN;
import static com.hartwig.hmftools.cobalt.CobaltConstants.DEFAULT_MIN_MAPPING_QUALITY;
import static com.hartwig.hmftools.cobalt.CobaltConstants.DEFAULT_PCF_GAMMA;
import static com.hartwig.hmftools.common.genome.gc.GCProfileFactory.GC_PROFILE;
Expand Down Expand Up @@ -45,6 +47,8 @@ public enum Mode
private static final String TARGET_REGION_NORM_FILE = "target_region";
private static final String INCLUDE_DUPLICATES = "include_duplicates";

private static final String GC_RATIO_FILE_MIN = "gc_ratio_file_min";
private static final String GC_RATIO_FILE_MAX = "gc_ratio_file_max";

public final String ReferenceId;
public final String ReferenceBamPath;
Expand All @@ -64,6 +68,9 @@ public enum Mode
public final ValidationStringency BamStringency;
public final boolean IncludeDuplicates;

public final double GcRatioFileMin;
public final double GcRatioFileMax;

public final String TumorOnlyDiploidBed;
public final String TargetRegionPath;

Expand All @@ -82,15 +89,17 @@ public CobaltConfig(final ConfigBuilder configBuilder)
TumorOnlyDiploidBed = configBuilder.getValue(TUMOR_ONLY_DIPLOID_BED);
TargetRegionPath = configBuilder.getValue(TARGET_REGION_NORM_FILE);
RefGenomePath = configBuilder.getValue(REF_GENOME);


GcRatioFileMin = configBuilder.getDecimal(GC_RATIO_FILE_MIN);
GcRatioFileMax = configBuilder.getDecimal(GC_RATIO_FILE_MAX);

MinMappingQuality = configBuilder.getInteger(MIN_MAPPING_QUALITY);
PcfGamma = configBuilder.getInteger(PCF_GAMMA);
IncludeDuplicates = configBuilder.hasFlag(INCLUDE_DUPLICATES);

BamStringency = BamUtils.validationStringency(configBuilder);
OutputDir = parseOutputDir(configBuilder);
Threads = parseThreads(configBuilder);

}

public static void registerConfig(final ConfigBuilder configBuilder)
Expand All @@ -103,6 +112,10 @@ public static void registerConfig(final ConfigBuilder configBuilder)

configBuilder.addPath(GC_PROFILE, true, GC_PROFILE_DESC);
configBuilder.addPath(REF_GENOME, false, REF_GENOME_CFG_DESC + ", required when using CRAM files");

configBuilder.addDecimal(GC_RATIO_FILE_MIN, "Restrict GC profile entries to above minimum", DEFAULT_GC_RATIO_FILE_MIN);
configBuilder.addDecimal(GC_RATIO_FILE_MAX, "Restrict GC profile entries to below maximum", DEFAULT_GC_RATIO_FILE_MAX);

configBuilder.addPath(TUMOR_ONLY_DIPLOID_BED, false, "Diploid regions for tumor-only mode");
configBuilder.addPath(TARGET_REGION_NORM_FILE, false, "Targeted regions normalisation file");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ public class CobaltConstants
public static final String APP_NAME = "Cobalt";

public static final int INVALID_VALUE_INDICATOR = -1;
public static final double MIN_DIPLOID = 0.85;
public static final double MAX_DIPLOID = 1.15;

public static final double DEFAULT_GC_RATIO_FILE_MIN = 0;
public static final double DEFAULT_GC_RATIO_FILE_MAX = 1;

public static final int WINDOW_SIZE = 1000;
public static final int PARTITION_SIZE = 100_000_000;
public static final int OFF_TARGET_WINDOW_SIZE = 1_000_000;
Expand Down

0 comments on commit 413a66f

Please sign in to comment.