-
Notifications
You must be signed in to change notification settings - Fork 2
/
AnnotationFileDefs.java
259 lines (246 loc) · 9.77 KB
/
AnnotationFileDefs.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package tappas;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
/**
* Class to load annotation file definitions.
* <p>
* These definitions determine how to extract various fields
* from the GFF3 annotation file, i.e. what the field names are.
* It also allows the code to know the name of various
* required sources and features.
*
* The loading approach in dataProject::loadAnnotationsFileDefs() could be changed
* to load a different annotation definition file for the project,
* in cases where users want to use their own annotation file,
* by providing an option in the dialog to specify the definition's file location.
* Not worth doing anything until the subject of allowing users to create
* their own annotation file has been fully addressed.
* We will probably end up allowing users to create their own annotation files dynamically
* using our application. In which case, it will be under our control.
*
* @author Hector del Risco - [email protected] & Pedro Salguero - [email protected]
*/
public class AnnotationFileDefs extends AppObject {
String srcGO, goP, goF, goC;
SourceFeatureNames transcript, protein, genomic;
SourceFeatureNames gene, cds, exon;
SourceFeatureNames sj, pScore;
ArrayList<SourceFeatureNames> domains = new ArrayList<>();
String attrId, attrName, attrDesc;
String attrChr, attrPScore, attrSJ;
String attrPriClass, attrSecClass;
String valSJ;
/**
* Instantiates an AnnotationFileDefs - constructor
* <p>
* Must call loadDefinitions before using definitions
*
* @param project project object if applicable, may be null
* @param logger logger object if applicable, may be null - defaults to app.logger
*/
public AnnotationFileDefs(Project project, Logger logger) {
super(project, null);
}
/**
* Loads annotation file definitions from given file path
* <p>
* Original file is provided with the application resources: annotations.def
*
* @param filepath definition file path
* @throws java.lang.Exception
*/
public void loadDefinitions(String filepath) throws Exception {
int lnum = 1;
String line;
BufferedReader br = null;
try {
br = Files.newBufferedReader(Paths.get(filepath), StandardCharsets.UTF_8);
for (; (line = br.readLine()) != null;) {
line = line.trim();
if(!line.isEmpty() && line.charAt(0) != '#') {
if(!setValue(line))
throw new Exception("Invalid line, " + lnum + ", found in annotation definitions file.");
}
lnum++;
}
try { br.close(); } catch(IOException e) {
// just log a warning, file was read properly
logger.logWarning("Closing annotation definitions file exception '" + e.getMessage() + "'");
}
if(!areAllValuesSet())
throw new Exception("Incomplete annotation definitions file.");
} catch ( IOException ioe) {
try { if(br != null) br.close(); } catch(IOException e) { System.out.println("Closing annotation definitions file exception (within IOException) '" + e.getMessage() + "'"); }
throw new Exception("Annotation definitions file I/O exception '" + ioe.getMessage() + "'");
}
}
/**
* Checks to see if given annotation source:feature refers to a protein domain
* <p>
* Protein domains are drawn differently in the Gene Data Visualization protein tab
*
* @param source annotation source
* @param feature annotation feature
* @return returns true if it is a protein domain
*/
// the DOMAIN feature is special - it is drawn as a semi-transparent block like the CDS for transcripts
public boolean isDomain(String source, String feature) {
boolean result = false;
for(SourceFeatureNames sfn : domains) {
if(sfn.source.equals(source) && sfn.feature.equals(feature)) {
result = true;
break;
}
}
return result;
}
//
// Internal Functions
//
// set internal definition value from given definition file line
private boolean setValue(String line) {
boolean result = false;
// pad line to allow having an empty field, e.g. "X="
// SAF_DOMAINS can have multiple values: SAF_DOMAINS=src1,ftr1;src2,ftr2
String id;
String fields[];
line += " ";
fields = line.split("=");
if(fields.length == 2) {
id = fields[0].trim();
String multivals[] = fields[1].trim().split(";");
for (String multival : multivals) {
fields = multival.trim().split(",");
for(int i = 0; i < fields.length; i++) {
fields[i] = fields[i].trim();
if(fields[i].equals("N/A"))
fields[i] = "";
}
boolean processed = true;
switch(id) {
//
// Source
//
case "SRC_GO":
srcGO = fields[0];
break;
//
// GO Features
//
case "GO_P":
goP = fields[0];
break;
case "GO_F":
goF = fields[0];
break;
case "GO_C":
goC = fields[0];
break;
//
// Source and Feature
//
case "SAF_TRANSCRIPT":
transcript = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_PROTEIN":
protein = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_GENOMIC":
genomic = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_GENE":
gene = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_CDS":
cds = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_EXON":
exon = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_SPLICEJUNCTION":
sj = new SourceFeatureNames(fields[0], fields[1]);
break;
case "SAF_DOMAIN":
domains.add(new SourceFeatureNames(fields[0], fields[1]));
break;
case "SAF_PROTEINSCORE":
pScore = new SourceFeatureNames(fields[0], fields[1]);
break;
//
// Attributes
//
case "ATTR_ID":
attrId = fields[0];
break;
case "ATTR_NAME":
attrName = fields[0];
break;
case "ATTR_DESCRIPTION":
attrDesc = fields[0];
break;
case "ATTR_CHROMOSOME":
attrChr = fields[0];
break;
case "ATTR_PROTEINSCORE":
attrPScore = fields[0];
break;
case "ATTR_SPLICEJUNCTION":
attrSJ = fields[0];
break;
case "ATTR_PRICLASS":
attrPriClass = fields[0];
break;
case "ATTR_SECCLASS":
attrSecClass = fields[0];
break;
//
// Special Values
//
case "NOVEL_SPLICEJUNCTION":
valSJ = fields[0];
break;
default:
processed = false;
break;
}
if(processed)
result = true;
}
}
return result;
}
// check that all definition values were set
// all definitions are required but can be set to empty string
private boolean areAllValuesSet() {
boolean result = false;
if(srcGO != null && goP != null && goF != null && goC != null &&
transcript != null && protein != null && genomic != null &&
gene != null && cds != null && exon != null &&
sj != null && !domains.isEmpty() && pScore != null &&
attrId != null && attrName != null && attrDesc != null &&
attrChr != null && attrPScore != null && attrSJ != null &&
valSJ != null) {
result = true;
}
return result;
}
//
// Data Classes
//
public class SourceFeatureNames {
String source, feature;
public SourceFeatureNames(String source, String feature) {
this.source = source;
this.feature = feature;
}
}
}