Skip to content

Commit

Permalink
Fix allowed sequence characters
Browse files Browse the repository at this point in the history
  • Loading branch information
JudithNeukamm committed Mar 27, 2019
1 parent a54561f commit a27a29d
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/IO/FastaReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ void readSequenceFromFile(String file)
for (int i=0; i< seq.size(); i++)
{
String acc = (String) desc.get(i);
description[i] = acc.split(" ")[0];
description[i] = acc.split(" ")[0].split("\\.")[0];
sequence[i]=(String) seq.get(i);
}

}

private boolean isSequenceValid(String seq) {

String specialCharacters = "[" + "ACGTUWSMKRYBDHVNZ"+ "]+" ;
String specialCharacters = "[" + "ACGTUWSMKRYBDHVNZacgtuwsmkrybdhvnz"+ "]+" ;

if (seq.matches(specialCharacters)) {
return true;
Expand Down
3 changes: 3 additions & 0 deletions src/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public static void main(String[] args) throws Exception {
// init functionality
ArgumentParser optionsParser = new ArgumentParser(args);
Validator validator = new Validator();
System.out.println("Reading fast file...");

mt_sequences_filepath = optionsParser.getFasta();
String[] fileName = mt_sequences_filepath.replaceFirst("[.][^.]+$", "").split("/");
Expand All @@ -34,6 +35,8 @@ public static void main(String[] args) throws Exception {

logfile.write("Data validation report based on files:\n" + mt_sequences_filepath + "\n"+ data_template_filepath + "\n\n");

System.out.println("Running validation...");

validator.validate(data_template_filepath, logfile, fastaheaders, fastaReader.getLog_sequence_corretness());
logfile.close();

Expand Down
6 changes: 3 additions & 3 deletions src/calculations/Validator.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public class Validator {
private List<String> publication_type = Arrays.asList("paper","peerprint","direct submission to genbank","direct submission to mitoDB","article");
private List<String> publication_status = Arrays.asList("published","protected","private","in press",
"in preparation","submitted","unpublished");
private List<String> sequencing_platform = Arrays.asList("illumina","454","sanger","nanopore","pacbio");
private List<String> sequencing_platform = Arrays.asList("illumina","454","sanger","nanopore","pacbio", "affymetrix");


private String log_missing_sequences="";
Expand Down Expand Up @@ -408,7 +408,7 @@ public boolean validate(String data_template, BufferedWriter logfile, List<Strin
string_accession_default += "--------------------------------------------------\n" + accession + ":\n";


if(!fastaheaders.contains(accession)){
if(!fastaheaders.contains(accession.split("\\.")[0])){
log_missing_sequences += "Sequence with this accession ID: "+ accession +" does not exist in fasta file.\n";
}

Expand Down Expand Up @@ -565,7 +565,7 @@ public boolean validate(String data_template, BufferedWriter logfile, List<Strin
String geographic_info_TMA_inferred_region = line_splitted[index_geographic_info_TMA_inferred_region].toLowerCase();
if (geographic_info_TMA_inferred_region.equals("")) {
log_missing_value += "\tGeographic info TMA inferred region is missing.\n";
} else if(!region.contains(geographic_info_TMA_inferred_region)){
} else if(!region.contains(geographic_info_TMA_inferred_region) || !isStringInt(geographic_info_TMA_inferred_region)){
log_incorrect_format += "\tGeographic info TMA inferred region is not in correct format: " + geographic_info_TMA_inferred_region + "\n";
}
}
Expand Down

0 comments on commit a27a29d

Please sign in to comment.