Skip to content

Commit

Permalink
Merge pull request #260 from geneontology/issue-255-manual-sgd-pthwy-…
Browse files Browse the repository at this point in the history
…bp-mappings

Parse manually curated file and map YeastPathway IDs to BPs
  • Loading branch information
dustine32 authored May 16, 2023
2 parents 4ba1a97 + b5bda2f commit 8f533df
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ enum EntityStrategy {
Map<String, String> accession_neo = new HashMap<String, String>(); //in case we need to store mappings to neo IRIs, use this
Map<String, String> monomerToSgdMappings = new HashMap<String, String>();
Map<String, String> yeastcyc2EC = new HashMap<String, String>(); //used to store mappings from YeastCyc ID to EC number in SGDIDs_to_ExPASy-ECs.txt
Map<String, String> pathwayIdToGoMappings = new HashMap<String, String>();
public BioPaxtoGO(){
strategy = ImportStrategy.NoctuaCuration;
report = new GoMappingReport();
Expand Down Expand Up @@ -206,6 +207,13 @@ void convert(
for (Map.Entry<String, String> sgdMapping : monomerToSgdMappings.entrySet()) {
accession_neo.put(sgdMapping.getKey(), sgdMapping.getValue());
}

// Also also also parse the manually curated yeast_pathway_ids_to_process_gos.tsv
String pathwayIdToGoFilePath = "/YeastCyc/yeast_pathway_ids_to_process_gos.tsv";
pathwayIdToGoMappings = Helper.parsePathwayIdToGoFile(pathwayIdToGoFilePath);
for (Map.Entry<String, String> pathwayIdToGoMapping : pathwayIdToGoMappings.entrySet()) {
pathwayIdToGoMappings.put(pathwayIdToGoMapping.getKey(), pathwayIdToGoMapping.getValue());
}
}

//read biopax pathway(s)
Expand Down Expand Up @@ -869,8 +877,16 @@ else if(process.getModelInterface().equals(Pathway.class)){
}
}
if(!mapped && entityStrategy.equals(EntityStrategy.YeastCyc)) {
Set<String> metacyc_gos = golego.xref_gos.get("MetaCyc:"+model_id);
if(metacyc_gos!=null) {
String pathway_id = getEntityReferenceId(pathway);
String manually_mapped_go = pathwayIdToGoMappings.get(pathway_id);
Set<String> metacyc_gos = golego.xref_gos.get("MetaCyc:"+pathway_id);
// Check first if GO BP is manually mapped
if(manually_mapped_go!=null) {
OWLClass go = go_cam.df.getOWLClass(IRI.create(manually_mapped_go));
go_cam.addTypeAssertion(pathway_e, go);
mapped = true;
}
else if(metacyc_gos!=null) {
for(String goid : metacyc_gos) {
OWLClass go = go_cam.df.getOWLClass(IRI.create(goid));
go_cam.addTypeAssertion(pathway_e, go);
Expand Down
22 changes: 22 additions & 0 deletions exchange/src/main/java/org/geneontology/gocam/exchange/Helper.java
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,29 @@ public static Map<String, String> parseGPI(String gpiFile) throws IOException {

return idLookup;
}

public static Map<String, String> parsePathwayIdToGoFile(String pathwayIdToGoFilePath) throws IOException {
Map<String, String> pathwayIdToGoLookup = new HashMap<String, String>();

InputStream pathwayIdToGoStream = Helper.class.getResourceAsStream(pathwayIdToGoFilePath);
BufferedReader ptwyToGoReader = new BufferedReader(new InputStreamReader(pathwayIdToGoStream));
String ptwyToGoLine = ptwyToGoReader.readLine();
while(ptwyToGoLine!=null) {
String[] cols = ptwyToGoLine.split(" ");
String pathwayId = cols[0];
String goCurie = cols[1];
if(!goCurie.startsWith("GO:")) {
continue;
}
String goUri = goCurie.replace("GO:", "http://purl.obolibrary.org/obo/GO_");
pathwayIdToGoLookup.put(pathwayId, goUri);

ptwyToGoLine = ptwyToGoReader.readLine();
}
ptwyToGoReader.close();

return pathwayIdToGoLookup;
}

public static Map<String, String> parseSgdIdToEcFile(String sgdIdToEcFilePath) throws IOException {
Map<String, Set<String>> ecLookup = new HashMap<String, Set<String>>(); // First track SGDIDs having multiple EC mappings
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
PWY3O-246 GO:0034078
PWY-5760-1 GO:0019483
PWY-2301 GO:0006021
PWY-5041 GO:0033353
PWY-5123 GO:0045337
PWY-5084 GO:0045333
PWY-6543 GO:0046656
YEAST-4AMINOBUTMETAB-PWY GO:0009450
PWY-6122-1 GO:0009260
PWY-6147 GO:0046656
ACETATEUTIL2-PWY GO:0019427
PWY3O-335 GO:0045151
PWY-7220-1 GO:0046085
PWY-7219 GO:0046086
PWY3O-188 GO:0019646
PWY-5694 GO:0000256
PWY-5697 GO:0009442
ARGDEG-YEAST-PWY GO:0006527
ASPARAGINE-DEG2-PWY GO:0006530
ASPBIO-PWY GO:0006532
SO4ASSIM-PWY GO:0019379
BIOTIN-SYNTHESIS-PWY GO:0009102
PWY3O-661 GO:0034079
PWY3O-4 GO:0006853
PWY3O-103 GO:0016024
PWY3O-15 GO:0006031
PWY3O-70 GO:0000271
ARO-PWY-1 GO:0009423
CITRUL-BIO2-PWY GO:0019240
CYSTEINE-SYN2-PWY GO:0019344
DENOVOPURINE3-PWY GO:0006164
YEAST-DE-NOVO-PYRMID-DNT GO:0006207
PYRIMID-RNTSYN-PWY GO:0006207
PWY3O-6-1 GO:0070485
TRIGLSYN-PWY GO:0019432
PWY-6482-1 GO:0017183
PWY3O-1565 GO:0034637
PWY3O-123 GO:0006013
PWY-5670-1 GO:0016104
PWY3O-31704 GO:0006696
PWY-6075-1 GO:0006696
PWY3O-4300 GO:0006068
PWY3O-10 GO:0006633
FASYN-ELONG2-PWY GO:0030497
YEAST-FAO-PWY GO:0019395
PWY3O-31723 GO:0006633
PWY3O-45 GO:0046656
PWY3O-697 GO:0046655
PWY3O-20 GO:0046655
PWY-2201 GO:0035999
PWY-1801-1 GO:0046294
PWY3O-0 GO:0006001
YEAST-GALACT-METAB-PWY GO:0019388
GLUCONEO-PWY-1 GO:0006094
PWY3O-3827 GO:0051156
GLUNH3-PWY GO:0097054
GLUDEG-I-PWY-1 GO:0036242
PWY-3322 GO:0006538
PWY-5177 GO:0046395
GLUTATHIONESYN-PWY GO:0006750
PWYQT-4432 GO:0006751
GLUT-REDOX2-PWY GO:0006749
PWY3O-592 GO:0006749
PWY3O-48 GO:0006114
AERO-GLYCEROL-CAT-PWY GO:0019563
GLYSYN-PWY GO:0019264
GLYSYN-ALA-PWY GO:0019265
GLYSYN-THR-PWY GO:0006545
GLYCLEAV-PWY GO:0019464
PWY3O-4031 GO:0005978
GLYCOCAT-YEAST-PWY GO:0005980
GLYCOLYSIS GO:0061620
GLYOXYLATE-BYPASS GO:0006097
PWY-7222-1 GO:0106387
PWY-7221 GO:0106387
HEME-BIOSYNTHESIS-II GO:0006783
HEXPPSYN-PWY-2 GO:0006744
HOMOCYS-CYS-CONVERT GO:0050667
PWY-6123-1 GO:0009152
PWY3O-402 GO:0032958
PWY3O-4109 GO:0006550
ALANINE-SYN2-PWY GO:0019272
ALANINE-DEG3-PWY GO:0019481
ARGSYNBSUB-PWY GO:0006526
ARG-PRO-PWY GO:0019493
ASPARAGINE-BIOSYNTHESIS-1 GO:0070981
ASPARTATESYN-PWY GO:0019465
HOMOCYSDEGR-PWY-1 GO:0019344
GLUGLNSYN-PWY GO:0004355
GLNSYN-PWY GO:0006542
HISTSYN-PWY GO:0000105
PWY-5344 GO:0071269
HOMOSERSYN-PWY GO:0009090
ILEUSYN-PWY-1 GO:1901705
LEUSYN-PWY-1 GO:0009098
LYSINE-AMINOAD-PWY-2 GO:0051975
LYSDEGII-PWY GO:0019473
PROSYN-PWY GO:0006561
PROUT-PWY GO:0010133
SERSYN-PWY GO:0006564
SERDEG-PWY GO:0006565
HOMOSER-THRESYN-PWY GO:0009088
TRPSYN-PWY-1 GO:0000162
TRYPTOPHAN-DEGRADATION-1 GO:0019442
PWY-5651 GO:0006569
PWY3O-4108 GO:0006572
VALSYN-PWY GO:0009099
PWY-6132 GO:0016126
PWY3O-4112 GO:0006552
GLUCOSE-MANNOSYL-CHITO-DOLICHOL GO:0006488
PWY3O-1743 GO:0019309
PWY3O-238 GO:0009086
PWY3O-64 GO:0071267
PWY-901 GO:0051596
IPPSYN-PWY GO:0008299
PWY-922 GO:0010142
PWY-5971-1 GO:0006633
NADSYN-PWY GO:0009435
PWY-5653 GO:0009435
PWY3O-4106 GO:0034355
PWY3O-4107 GO:0034355
PWY3O-96 GO:0034356
PWY3O-224 GO:0034355
PWY3O-236 GO:0034355
PWY3O-5268 GO:0006636
PWY3O-1874 GO:0019438
PWY3O-1109 GO:0019438
PWY3O-8514 GO:0006633
PWY3O-1801 GO:0006636
PANTOSYN2-PWY GO:0015937
PENTOSE-P-PWY GO:0006098
NONOXIPENT-PWY GO:0009052
OXIDATIVEPENT-PWY GO:0009051
PERIPLASMA-NAD-DEGRADATION GO:0019677
PWY3O-4153 GO:0009094
PWY3O-4115 GO:0006559
PWY3O-6407 GO:0008654
PWY3O-6499 GO:0008654
PWY3O-450 GO:0006657
PWY-5669 GO:0006646
PWY3O-3 GO:0006661
PWY3O-242 GO:0046854
PHOSLIPSYN2-PWY-1 GO:0008654
PWY3O-259 GO:0046474
LIPASYN-PWY-1 GO:0009395
PWY0-662 GO:0004749
PWY-46 GO:0009446
PLPSAL-PWY GO:0009443
PYRUVDEHYD-PWY GO:0006086
PWY3O-440 GO:0045151
YEAST-RIBOSYN-PWY GO:0042727
SAM-PWY GO:0006556
PWY3O-2220 GO:0043101
PWY3O-743 GO:0043101
PWY3O-1 GO:0043101
YEAST-SALV-PYRMID-DNTP GO:0043099
YEAST-RNT-SALV GO:0010138
PWY3O-230 GO:0006564
PWY-5194-1 GO:0019354
BSUBPOLYAMSYN-PWY GO:0008295
ARGSPECAT-PWY GO:0019548
SPHINGOLIPID-SYN-PWY-1 GO:0030148
PWY3O-355 GO:0006633
SUCUTIL-PWY-2 GO:0005987
PWY-5340 GO:0050427
PWY-781 GO:0000103
DETOX1-PWY GO:0019430
PWY3O-981 GO:1902652
PWY-6126-1 GO:0046086
ALLANTOINDEG-PWY GO:0000256
COMPLETE-ARO-PWY-1 GO:0009073
BRANCHED-CHAIN-AA-SYN-PWY-1 GO:0009082
ALL-CHORISMATE-PWY-1 GO:0046417
ERGOSTEROL-SYN-PWY-1 GO:0006696
PWY3O-6336 GO:0019367
PWY3O-5962 GO:0006633
GLUCFERMEN-PWY GO:0019658
PWY3O-13 GO:0097054
PWY3O-114 GO:0006749
PWY-6125 GO:0106387
PWY3O-69 GO:0006783
PRPP-PWY-1 GO:0009165
THRESYN-PWY GO:0009088
PWY3O-954 GO:0009086
PWY3O-351 GO:0071267
PWY3O-4158 GO:0009435
PWY3O-6635 GO:0006654
PHOS-PWY GO:0008654
PWY3O-2 GO:0008654
POLYAMSYN-YEAST-PWY GO:0006596
PWY3O-285 GO:0043101
DENOVOPURINE2-PWY GO:0006164
PWY0-162 GO:0009220
PWY3O-261 GO:0009070
PWY-821-1 GO:0000097
PWY3O-94 GO:0044238
FOLSYN-PWY-1 GO:0035999
P4-PWY-1 GO:0006566
PWY3O-7 GO:0009088
PWY3O-862 GO:0006744
TCA-EUK-PWY GO:0009060
PWY-6614 GO:0046654
PWY3O-50 GO:0033014
PWY3O-17 GO:0009228
THIOREDOX-PWY GO:0006125
THREOCAT2-PWY GO:0006567
TRESYN-PWY GO:0005992
TREDEG-YEAST-PWY GO:0005993
PWY3O-214 GO:0006569
PWY3O-4120 GO:0006571
PWY3O-19 GO:0009058
UDPNAGSYN-YEAST-PWY GO:0006048
PWY-5686-1 GO:0044205
PWY-5703 GO:0043419
PWY-7176 GO:0006207
PWY3O-4105 GO:0006574
PWY-5080-1 GO:0042761
PWY3O-8 GO:0042732
PWY3O-5 GO:0005998
PWY-6074-1 GO:0036197
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,26 @@ public final void testChemicalRoleReplacement() {
assertTrue("There are 'acceptor' type assertions in "+pathway, n==0);
}

@Test
public final void testYeastPathwayIdToGoMapping() {
System.out.println("testing GO BP mapping to YeastPathways via manual file");
String pathway = "<http://model.geneontology.org/YeastPathways_ERGOSTEROL-SYN-PWY-1>";
String pathway_node = "<http://model.geneontology.org/YeastPathways_ERGOSTEROL-SYN-PWY-1/YeastPathways_ERGOSTEROL-SYN-PWY-1>";
String pathway_go_type = "<http://purl.obolibrary.org/obo/GO_0006696>";
String q =
" prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ "SELECT ?comment \n" +
"WHERE {\n" +
" GRAPH "+pathway+" { \n" +
" "+pathway_node+" rdf:type "+pathway_go_type +" . \n" +
" "+pathway_node+" rdfs:comment ?comment "+
" }\n" +
" } \n";
int n = runQueryAndGetCount(q);
assertTrue("Didn't get manually-mapped BP type assertion for "+pathway_node, n==1);
}

@Test
public final void testYeastComplexComponents() {
System.out.println("testing expression of YeastCyc complexes");
Expand Down

0 comments on commit 8f533df

Please sign in to comment.