Skip to content

Commit

Permalink
changed IS_GERMLINE to SV_STATUS
Browse files Browse the repository at this point in the history
  • Loading branch information
mandawilson committed Feb 9, 2022
1 parent e55e602 commit 4fe21b3
Show file tree
Hide file tree
Showing 18 changed files with 143 additions and 140 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public static void addStructuralVariantToBulkLoader(StructuralVariant structural
"LENGTH",
"COMMENTS",
"EXTERNAL_ANNOTATION",
"IS_GERMLINE",
"SV_STATUS",
};
bl.setFieldNames(fieldNames);

Expand All @@ -90,13 +90,13 @@ public static void addStructuralVariantToBulkLoader(StructuralVariant structural
Long.toString(structuralVariant.getInternalId()),
Integer.toString(structuralVariant.getGeneticProfileId()),
Integer.toString(structuralVariant.getSampleIdInternal()),
Long.toString(structuralVariant.getSite1EntrezGeneId()),
structuralVariant.getSite1EntrezGeneId() == null ? null : Long.toString(structuralVariant.getSite1EntrezGeneId()),
structuralVariant.getSite1EnsemblTranscriptId(),
Integer.toString(structuralVariant.getSite1Exon()),
structuralVariant.getSite1Chromosome(),
Integer.toString(structuralVariant.getSite1Position()),
structuralVariant.getSite1Description(),
Long.toString(structuralVariant.getSite2EntrezGeneId()),
structuralVariant.getSite2EntrezGeneId() == null ? null : Long.toString(structuralVariant.getSite2EntrezGeneId()),
structuralVariant.getSite2EnsemblTranscriptId(),
Integer.toString(structuralVariant.getSite2Exon()),
structuralVariant.getSite2Chromosome(),
Expand All @@ -123,8 +123,7 @@ public static void addStructuralVariantToBulkLoader(StructuralVariant structural
Integer.toString(structuralVariant.getLength()),
structuralVariant.getComments(),
structuralVariant.getExternalAnnotation(),
Integer.toString(structuralVariant.getIsGermline()?1:0));
//TODO: fix this ... the value must be converted to an integer
structuralVariant.getSvStatus());

if ((structuralVariant.getDriverFilter() != null
&& !structuralVariant.getDriverFilter().isEmpty()
Expand Down Expand Up @@ -240,7 +239,7 @@ private static StructuralVariant extractStructuralVariant(ResultSet rs) throws S
structuralVariant.setDriverFilterAnn(rs.getString("DRIVER_FILTER_ANNOTATION"));
structuralVariant.setDriverTiersFilter(rs.getString("DRIVER_TIERS_FILTER"));
structuralVariant.setDriverTiersFilterAnn(rs.getString("DRIVER_TIERS_FILTER_ANNOTATION"));
structuralVariant.setIsGermline(rs.getBoolean("IS_GERMLINE"));
structuralVariant.setSvStatus(rs.getString("SV_STATUS"));
return structuralVariant;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public class StructuralVariant {
private String driverFilterAnn;
private String driverTiersFilter;
private String driverTiersFilterAnn;
private Boolean isGermline;
private String svStatus;

public long getInternalId() {
return internalId;
Expand Down Expand Up @@ -335,10 +335,10 @@ public String getDriverTiersFilterAnn() {
public void setDriverTiersFilterAnn(String driverTiersFilterAnn) {
this.driverTiersFilterAnn = driverTiersFilterAnn;
}
public Boolean getIsGermline() {
return isGermline;
public String getSvStatus() {
return svStatus;
}
public void setIsGermline(Boolean isGermline) {
this.isGermline = isGermline;
public void setSvStatus(String svStatus) {
this.svStatus = svStatus;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,23 @@ public void importData() throws IOException, DaoException {
CanonicalGene site2CanonicalGene = setCanonicalGene(site2EntrezGeneId, site2HugoSymbol, daoGene);

// If neither of the genes is recognized, skip the line
if(site1CanonicalGene == null) {
ProgressMonitor.logWarning("Gene not found: " + site1HugoSymbol + " ["
+ site1EntrezGeneId + "]. Ignoring it "
+ "and all fusion data associated with it!");
} else if (site2CanonicalGene == null) {
ProgressMonitor.logWarning("Gene not found: " + site2HugoSymbol + " ["
if(site1CanonicalGene == null && site2CanonicalGene == null) {
ProgressMonitor.logWarning("Could not find gene 1: " + site1HugoSymbol + " [" + site1EntrezGeneId
+ "] or gene 2: " + site2HugoSymbol + " ["
+ site2EntrezGeneId + "]. Ignoring it "
+ "and all fusion data associated with it!");
// If both genes are recognized, continue
+ "and all SV data associated with it!");
// If at least one gene is recognized, continue
} else {
// Save the Entrez Gene Id if it was not saved before
if (site1EntrezGeneId == TabDelimitedFileUtil.NA_LONG) {
if (site1EntrezGeneId == TabDelimitedFileUtil.NA_LONG && site1CanonicalGene != null) {
structuralVariant.setSite1EntrezGeneId(site1CanonicalGene.getEntrezGeneId());
} else if (site1EntrezGeneId == TabDelimitedFileUtil.NA_LONG) {
structuralVariant.setSite1EntrezGeneId(null); // we want this to be null in the database, not NA_LONG
}
if (site2EntrezGeneId == TabDelimitedFileUtil.NA_LONG) {
if (site2EntrezGeneId == TabDelimitedFileUtil.NA_LONG && site2CanonicalGene != null) {
structuralVariant.setSite2EntrezGeneId(site2CanonicalGene.getEntrezGeneId());
} else if (site2EntrezGeneId == TabDelimitedFileUtil.NA_LONG) {
structuralVariant.setSite2EntrezGeneId(null); // we want this to be null in the database, not NA_LONG
}
// Add structural variant
DaoStructuralVariant.addStructuralVariantToBulkLoader(structuralVariant);
Expand Down Expand Up @@ -160,7 +161,7 @@ private CanonicalGene setCanonicalGene(long siteEntrezGeneId, String siteHugoSym
}

// If no gene can be found based on Entrez Gene ID, try Symbol.
if (siteCanonicalGene == null) {
if (siteCanonicalGene == null && !TabDelimitedFileUtil.NA_STRING.equals(siteHugoSymbol)) {
siteCanonicalGene = daoGene.getNonAmbiguousGene(siteHugoSymbol, true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ public class StructuralVariantUtil {
public static final String DRIVER_FILTER_ANNOTATION = "cbp_driver_annotation";
public static final String DRIVER_TIERS_FILTER = "cbp_driver_tiers";
public static final String DRIVER_TIERS_FILTER_ANNOTATION = "cbp_driver_tiers_annotation";
public static final String IS_GERMLINE = "is_germline";

public static final String SV_STATUS = "Sv_Status";
public StructuralVariantUtil(){}

public StructuralVariantUtil(String line) {
Expand Down Expand Up @@ -140,7 +140,10 @@ public StructuralVariant parseStructuralVariantRecord(String[] parts) {
structuralVariant.setDriverFilterAnn(TabDelimitedFileUtil.getPartString(getColumnIndex(StructuralVariantUtil.DRIVER_FILTER_ANNOTATION), parts));
structuralVariant.setDriverTiersFilter(TabDelimitedFileUtil.getPartString(getColumnIndex(StructuralVariantUtil.DRIVER_TIERS_FILTER), parts));
structuralVariant.setDriverTiersFilterAnn(TabDelimitedFileUtil.getPartString(getColumnIndex(StructuralVariantUtil.DRIVER_TIERS_FILTER_ANNOTATION), parts));
structuralVariant.setIsGermline(TabDelimitedFileUtil.getPartInt(getColumnIndex(StructuralVariantUtil.IS_GERMLINE), parts) != 0);
structuralVariant.setSvStatus(TabDelimitedFileUtil.getPartString(getColumnIndex(StructuralVariantUtil.SV_STATUS), parts));
if (TabDelimitedFileUtil.NA_STRING.equals(structuralVariant.getSvStatus())) {
structuralVariant.setSvStatus(null); // we want to use the database default
}
return structuralVariant;
}

Expand All @@ -162,7 +165,7 @@ public int getColumnIndex(String colName) {
* If a structural variant record has a mix of defined and missing values for Site 1 or Site 2
* Ensembl transcript IDs and/or exons then the structural variant record will not be imported.
*
* Example (assuming that site 1 and site 2 hugo symbol and/or entrez id are present):
* Example (assuming that site 1 or site 2 hugo symbol and/or entrez id are present):
*
* Valid Record:
* Site 1 Transcript: EST0000024958
Expand Down Expand Up @@ -195,8 +198,7 @@ public Boolean hasRequiredStructuralVariantFields(StructuralVariant record) {
record.getSite1Exon() != -1 &&
record.getSite2Exon() != -1);
return ( hasNoEnsemblExonValues || hasAllEnsemblExonValues ) &&
(record.getSite1EntrezGeneId() != Long.MIN_VALUE || !record.getSite1HugoSymbol().equalsIgnoreCase(TabDelimitedFileUtil.NA_STRING)) &&
(record.getSite2EntrezGeneId() != Long.MIN_VALUE || !record.getSite2HugoSymbol().equalsIgnoreCase(TabDelimitedFileUtil.NA_STRING));
(record.getSite1EntrezGeneId() != Long.MIN_VALUE || !record.getSite1HugoSymbol().equalsIgnoreCase(TabDelimitedFileUtil.NA_STRING) || record.getSite2EntrezGeneId() != Long.MIN_VALUE || !record.getSite2HugoSymbol().equalsIgnoreCase(TabDelimitedFileUtil.NA_STRING));
}

}
6 changes: 3 additions & 3 deletions core/src/test/resources/data_structural_variants.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sample_ID Site1_Entrez_Gene_Id Site1_Hugo_Symbol Site1_Ensembl_Transcript_Id Site1_Exon Site1_Chromosome Site1_Position Site1_Description Site2_Entrez_Gene_Id Site2_Hugo_Symbol Site2_Ensembl_Transcript_Id Site2_Exon Site2_Chromosome Site2_Position Site2_Description Site2_Effect_On_Frame NCBI_Build DNA_Support RNA_Support Normal_Read_Count Tumor_Read_Count Normal_Variant_Count Tumor_Variant_Count Normal_Paired_End_Read_Count Tumor_Paired_End_Read_Count Normal_Split_Read_Count Tumor_Split_Read_Count Annotation Breakpoint_Type Center Connection_Type Event_Info Class Length Comments External_Annotation cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation is_germline
TCGA-A1-A0SB-01 NA KIAA1549 ENST00000242365 15 7 138536968 KIAA1549-BRAF.K16B10.COSF509_1 NA BRAF ENST00000288602 10 7 140482957 KIAA1549-BRAF.K16B10.COSF509_2 NA GRCh37 no yes NA 1000 NA 900 NA NA NA NA KIAA1549-BRAF.K16B10.COSF509 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF509 NA NA NA NA 0
TCGA-A1-A0SD-01 NA EML4 ENST00000318522 13 2 42522656 EML4-ALK.E13A20.AB462411_1 NA ALK ENST00000389048 20 2 29446335 EML4-ALK.E13A20.AB462411_2 NA GRCh37 no yes NA 1006 NA 300 NA NA NA NA EML4-ALK.E13A20 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB462411 NA NA NA NA 0
Sample_ID Site1_Entrez_Gene_Id Site1_Hugo_Symbol Site1_Ensembl_Transcript_Id Site1_Exon Site1_Chromosome Site1_Position Site1_Description Site2_Entrez_Gene_Id Site2_Hugo_Symbol Site2_Ensembl_Transcript_Id Site2_Exon Site2_Chromosome Site2_Position Site2_Description Site2_Effect_On_Frame NCBI_Build DNA_Support RNA_Support Normal_Read_Count Tumor_Read_Count Normal_Variant_Count Tumor_Variant_Count Normal_Paired_End_Read_Count Tumor_Paired_End_Read_Count Normal_Split_Read_Count Tumor_Split_Read_Count Annotation Breakpoint_Type Center Connection_Type Event_Info Class Length Comments External_Annotation cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation Sv_Status
TCGA-A1-A0SB-01 NA KIAA1549 ENST00000242365 15 7 138536968 KIAA1549-BRAF.K16B10.COSF509_1 NA BRAF ENST00000288602 10 7 140482957 KIAA1549-BRAF.K16B10.COSF509_2 NA GRCh37 no yes NA 1000 NA 900 NA NA NA NA KIAA1549-BRAF.K16B10.COSF509 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF509 NA NA NA NA SOMATIC
TCGA-A1-A0SD-01 NA EML4 ENST00000318522 13 2 42522656 EML4-ALK.E13A20.AB462411_1 NA ALK ENST00000389048 20 2 29446335 EML4-ALK.E13A20.AB462411_2 NA GRCh37 no yes NA 1006 NA 300 NA NA NA NA EML4-ALK.E13A20 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB462411 NA NA NA NA SOMATIC
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Sample_ID Site1_Entrez_Gene_Id Site1_Hugo_Symbol Site1_Ensembl_Transcript_Id Site1_Exon Site1_Chromosome Site1_Position Site1_Description Site2_Entrez_Gene_Id Site2_Hugo_Symbol Site2_Ensembl_Transcript_Id Site2_Exon Site2_Chromosome Site2_Position Site2_Description Site2_Effect_On_Frame NCBI_Build DNA_Support RNA_Support Normal_Read_Count Tumor_Read_Count Normal_Variant_Count Tumor_Variant_Count Normal_Paired_End_Read_Count Tumor_Paired_End_Read_Count Normal_Split_Read_Count Tumor_Split_Read_Count Annotation Breakpoint_Type Center Connection_Type Event_Info Class Length Comments External_Annotation cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation is_germline
TCGA-A2-A04P-01 NA KIAA1549 ENST00000242365 1500 7 138536968 KIAA1549-BRAF.K16B10.COSF509_1 NA BRAF ENST00000288602 10 7 140482957 KIAA1549-BRAF.K16B10.COSF509_2 NA GRCh37 no yes NA 1000 NA 900 NA NA NA NA KIAA1549-BRAF.K16B10.COSF509 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF509 NA NA NA NA 0
TCGA-A2-A04P-01 NA NCOA4 ENST00000344348 7 10 51582939 NCOA4-RET.N7R12_1 NA RET ENST00000340058 12 10 43612031 NCOA4-RET.N7R12_2 NA GRCh37 no yes NA 1001 NA 800 NA NA NA NA NCOA4-RET.N7R1 NA NA NA Fusion NA NA Gain-of-Function NA NA NA NA NA 0
TCGA-A2-A04P-01 NA EML4 ENST00000318522 6 2 42492091 EML4-ALK.E6bA20.AB374362_1 NA ALK ENST00000389048 2000 2 29446394 EML4-ALK.E6bA20.AB374362_2 NA GRCh37 no yes NA 1002 NA 700 NA NA NA NA EML4-ALK.E6bA20.AB374362 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB374362 NA NA NA NA 0
TCGA-A2-A04P-01 NA TMPRSS2 ENST00000332149 1 21 42880007 TMPRSS2-ERG.T1E2.COSF23.1_1 NA ERG ENST00000442448 2 21 39956869 TMPRSS2-ERG.T1E2.COSF23.1_2 NA GRCh37 no yes NA 1003 NA 600 NA NA NA NA TMPRSS2-ERG.T1E2.COSF23.1 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF23 NA NA NA NA 0
TCGA-A2-A04P-01 NA EGFR ENST00000275493 1 7 55087058 EGFR-EGFR.E1E8.DelPositive.1_1 NA EGFR ENST00000275493 8 7 55223522 EGFR-EGFR.E1E8.DelPositive.1_2 NA GRCh37 no yes NA 1004 NA 500 NA NA NA NA EGFR-EGFR.E1E8.DelPositive NA NA NA Fusion NA NA NA NA NA NA NA NA 0
TCGA-A2-A04P-01 NA ALK ENST00000389048 11 2 29497964 ALK-PTPN3.A11P3_1 NA PTPN3 ENST00000374541 3 9 112219679 ALK-PTPN3.A11P3_2 NA GRCh37 no yes NA 1005 NA 400 NA NA NA NA ALK-PTPN3.A11P3 NA NA NA Fusion NA NA NA NA NA NA NA NA 0
TCGA-A1-A0SB-01 NA EML4 ENST00000318522 13 2 42522656 EML4-ALK.E13A20.AB462411_1 NA ALK ENST00000389048 20 2 29446335 EML4-ALK.E13A20.AB462411_2 NA GRCh37 no yes NA 1006 NA 300 NA NA NA NA EML4-ALK.E13A20 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB462411 NA NA NA NA 0
Sample_ID Site1_Entrez_Gene_Id Site1_Hugo_Symbol Site1_Ensembl_Transcript_Id Site1_Exon Site1_Chromosome Site1_Position Site1_Description Site2_Entrez_Gene_Id Site2_Hugo_Symbol Site2_Ensembl_Transcript_Id Site2_Exon Site2_Chromosome Site2_Position Site2_Description Site2_Effect_On_Frame NCBI_Build DNA_Support RNA_Support Normal_Read_Count Tumor_Read_Count Normal_Variant_Count Tumor_Variant_Count Normal_Paired_End_Read_Count Tumor_Paired_End_Read_Count Normal_Split_Read_Count Tumor_Split_Read_Count Annotation Breakpoint_Type Center Connection_Type Event_Info Class Length Comments External_Annotation cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation Sv_Status
TCGA-A2-A04P-01 NA KIAA1549 ENST00000242365 1500 7 138536968 KIAA1549-BRAF.K16B10.COSF509_1 NA BRAF ENST00000288602 10 7 140482957 KIAA1549-BRAF.K16B10.COSF509_2 NA GRCh37 no yes NA 1000 NA 900 NA NA NA NA KIAA1549-BRAF.K16B10.COSF509 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF509 NA NA NA NA SOMATIC
TCGA-A2-A04P-01 NA NCOA4 ENST00000344348 7 10 51582939 NCOA4-RET.N7R12_1 NA RET ENST00000340058 12 10 43612031 NCOA4-RET.N7R12_2 NA GRCh37 no yes NA 1001 NA 800 NA NA NA NA NCOA4-RET.N7R1 NA NA NA Fusion NA NA Gain-of-Function NA NA NA NA NA SOMATIC
TCGA-A2-A04P-01 NA EML4 ENST00000318522 6 2 42492091 EML4-ALK.E6bA20.AB374362_1 NA ALK ENST00000389048 2000 2 29446394 EML4-ALK.E6bA20.AB374362_2 NA GRCh37 no yes NA 1002 NA 700 NA NA NA NA EML4-ALK.E6bA20.AB374362 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB374362 NA NA NA NA SOMATIC
TCGA-A2-A04P-01 NA TMPRSS2 ENST00000332149 1 21 42880007 TMPRSS2-ERG.T1E2.COSF23.1_1 NA ERG ENST00000442448 2 21 39956869 TMPRSS2-ERG.T1E2.COSF23.1_2 NA GRCh37 no yes NA 1003 NA 600 NA NA NA NA TMPRSS2-ERG.T1E2.COSF23.1 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF23 NA NA NA NA SOMATIC
TCGA-A2-A04P-01 NA EGFR ENST00000275493 1 7 55087058 EGFR-EGFR.E1E8.DelPositive.1_1 NA EGFR ENST00000275493 8 7 55223522 EGFR-EGFR.E1E8.DelPositive.1_2 NA GRCh37 no yes NA 1004 NA 500 NA NA NA NA EGFR-EGFR.E1E8.DelPositive NA NA NA Fusion NA NA NA NA NA NA NA NA SOMATIC
TCGA-A2-A04P-01 NA ALK ENST00000389048 11 2 29497964 ALK-PTPN3.A11P3_1 NA PTPN3 ENST00000374541 3 9 112219679 ALK-PTPN3.A11P3_2 NA GRCh37 no yes NA 1005 NA 400 NA NA NA NA ALK-PTPN3.A11P3 NA NA NA Fusion NA NA NA NA NA NA NA NA SOMATIC
TCGA-A1-A0SB-01 NA EML4 ENST00000318522 13 2 42522656 EML4-ALK.E13A20.AB462411_1 NA ALK ENST00000389048 20 2 29446335 EML4-ALK.E13A20.AB462411_2 NA GRCh37 no yes NA 1006 NA 300 NA NA NA NA EML4-ALK.E13A20 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB462411 NA NA NA NA SOMATIC
Loading

0 comments on commit 4fe21b3

Please sign in to comment.