Skip to content

Commit

Permalink
Add environment variables to compose file and some cleanups after rem…
Browse files Browse the repository at this point in the history
…oval of /etc/massbank.conf
  • Loading branch information
meier-rene committed Jan 29, 2025
1 parent 98df821 commit 3045297
Show file tree
Hide file tree
Showing 119 changed files with 128 additions and 11,885 deletions.
7 changes: 0 additions & 7 deletions Documentation/3rd-party_support.md

This file was deleted.

Empty file removed Documentation/AddMetaData.md
Empty file.
Empty file removed Documentation/Validator.md
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package massbank;

import java.io.IOException;
import java.util.Properties;

public class ProjectPropertiesLoader {

public static Properties loadProperties() {
Properties properties = new Properties();
try {
properties.load(ProjectPropertiesLoader.class.getClassLoader().getResourceAsStream("project.properties"));
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
return properties;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package massbank.cli;

import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
Expand All @@ -10,28 +9,22 @@
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.nio.file.Path;
import java.util.*;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import massbank.ProjectPropertiesLoader;
import massbank.RecordParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
Expand All @@ -55,21 +48,18 @@
import de.undercouch.citeproc.bibtex.BibTeXConverter;
import de.undercouch.citeproc.bibtex.BibTeXItemDataProvider;
import io.github.dan2097.jnainchi.InchiStatus;
import io.github.dan2097.jnainchi.JnaInchi;
import massbank.PubchemResolver;
import massbank.Record;
import org.petitparser.context.Result;

import java.util.stream.Collectors;

import static massbank.cli.Validator.parseRecord;

/**
* This class adds meta information automatically where feasible and makes some automatic fixes. Supported functions are:<p>
* {@link doPub}<p>
* {@link doName}<p>
* {@link doLink}
*
*
* @author rmeier
* @version 27-06-2019
* @version 29-01-2025
* @version 29-01-2025
*/
public class AddMetaData {
private static final Logger logger = LogManager.getLogger(AddMetaData.class);
Expand All @@ -82,7 +72,7 @@ public class AddMetaData {
public static String getComptoxID(String INCHIKEY) throws JsonSyntaxException, MalformedURLException, IOException {
Gson gson = new GsonBuilder().setPrettyPrinting().create();
return gson.fromJson(IOUtils.toString(new URL("https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json?identifier=" + INCHIKEY),
Charset.forName("UTF-8")), JsonObject.class).getAsJsonObject("DataRow").getAsJsonPrimitive("dtxsid").getAsString();
StandardCharsets.UTF_8), JsonObject.class).getAsJsonObject("DataRow").getAsJsonPrimitive("dtxsid").getAsString();
}

/**
Expand All @@ -96,14 +86,14 @@ public static String getChemspiderID(String INCHIKEY) throws JsonSyntaxException
connection.setRequestMethod("POST");
connection.setRequestProperty("Content-Type", "");
connection.setRequestProperty("apikey", CHEMSPIDER_API_KEY);
OutputStreamWriter writer = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");
OutputStreamWriter writer = new OutputStreamWriter(connection.getOutputStream(), StandardCharsets.UTF_8);
writer.write("{\"inchikey\": \"" + INCHIKEY + "\" }");
writer.close();
String queryID = gson.fromJson(IOUtils.toString(connection.getInputStream(), Charset.forName("UTF-8")), JsonObject.class).getAsJsonPrimitive("queryId").getAsString();
String queryID = gson.fromJson(IOUtils.toString(connection.getInputStream(), StandardCharsets.UTF_8), JsonObject.class).getAsJsonPrimitive("queryId").getAsString();
connection.setRequestMethod("GET");
connection.setRequestProperty("Content-Type", "");
connection.setRequestProperty("apikey", CHEMSPIDER_API_KEY);
return gson.fromJson(IOUtils.toString(connection.getInputStream(), Charset.forName("UTF-8")), JsonObject.class).getAsJsonArray("results").get(0).getAsString();
return gson.fromJson(IOUtils.toString(connection.getInputStream(), StandardCharsets.UTF_8), JsonObject.class).getAsJsonArray("results").get(0).getAsString();
}


Expand All @@ -130,7 +120,7 @@ public static String doPub(Record record, String recordstring) {
String formated_citation=null;
try {
// URL encode the doi
String EncDoi=URLEncoder.encode(doi, StandardCharsets.UTF_8.toString());
String EncDoi=URLEncoder.encode(doi, StandardCharsets.UTF_8);
URLConnection conn = new URL("https://www.doi.org/"+EncDoi).openConnection();
conn.setRequestProperty("Accept", "application/x-bibtex");
BibTeXItemDataProvider p = new BibTeXItemDataProvider();
Expand Down Expand Up @@ -180,13 +170,13 @@ public static String doName(Record record, String recordstring) {
duplicates.add(c);
}
}
if (duplicates.size()>0) {
if (!duplicates.isEmpty()) {
for (String d : duplicates) {
// find first occurrence
String fullDup = "CH$NAME: " + d + "\n";
int index = recordstring.indexOf(fullDup)+fullDup.length();
String begining = recordstring.substring(0, index);
String end = recordstring.substring(index, recordstring.length()).replace(fullDup, "");
String end = recordstring.substring(index).replace(fullDup, "");
recordstring = begining + end;
}
}
Expand Down Expand Up @@ -368,7 +358,7 @@ else if (ret == InchiStatus.ERROR) {
ch_link.put("INCHIKEY", INCHIKEY);
//sort
ch_link=ch_link.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.sorted(Entry.comparingByKey())
.collect(Collectors.toMap(Entry::getKey, Entry::getValue, (u, v) -> u, LinkedHashMap::new));
record.CH_LINK(ch_link);
}
Expand Down Expand Up @@ -423,7 +413,7 @@ else if (ret == InchiStatus.ERROR) {
ch_link.put("INCHIKEY", INCHIKEY);
//sort
ch_link=ch_link.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.sorted(Entry.comparingByKey())
.collect(Collectors.toMap(Entry::getKey, Entry::getValue, (u, v) -> u, LinkedHashMap::new));
record.CH_LINK(ch_link);
}
Expand Down Expand Up @@ -489,121 +479,95 @@ else if (ret == InchiStatus.ERROR) {

public static void main(String[] arguments) throws Exception {
// load version and print
final Properties properties = new Properties();
try {
properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
Properties properties = ProjectPropertiesLoader.loadProperties();
System.out.println("AddMetaData version: " + properties.getProperty("version"));

// parse command line
Options options = new Options();
options.addOption("a", "all", false, "execute all operations");
options.addOption("p", "publication", false, "format PUBLICATION tag from given DOI to follow the guidelines of ACS");
options.addOption("n", "name", false, "fix common problems in CH$NAME tag");
options.addOption("l", "link", false, "add links to CH$LINK tag");
options.addOption("r", "rewrite", false, "read and rewrite the file.");
options.addOption("ms_focused_ion", false, "Inspect MS$FOCUSED_ION");
options.addOption(null, "add-inchikey", false, "Add or fix InChIKey from the value in CH$IUPAC");
options.addOption(null, "add-pubchemcid", false, "Add or fix PubChem CID from InChIKey and flag Problems.");
options.addOption(null, "smiles-from-inchi", false, "Set SMILES from existing InChi.");
CommandLine cmd = null;
try {
cmd = new DefaultParser().parse( options, arguments);
}
catch(ParseException e) {
// oops, something went wrong
System.err.println( "Parsing command line failed. Reason: " + e.getMessage() );
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
if (cmd.getArgList().size() == 0) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}

// find all files in arguments and all *.txt files in directories and subdirectories
// specified in arguments
List<File> recordfiles = new ArrayList<>();
for (String argument : cmd.getArgList()) {
File argumentf = new File(argument);
if (argumentf.isFile() && FilenameUtils.getExtension(argument).equals("txt")) {
recordfiles.add(argumentf);
} else if (argumentf.isDirectory()) {
recordfiles.addAll(FileUtils.listFiles(argumentf, new String[] {"txt"}, true));
} else {
logger.warn("Argument " + argument + " could not be processed.");
}
}

if (recordfiles.size() == 0 ) {
logger.error("No files found.");
System.exit(1);
}

// validate all files
logger.trace("Validating " + recordfiles.size() + " files");
CommandLine cmd = parseCommandLine(arguments);
AtomicBoolean doAddPubchemCid = new AtomicBoolean(cmd.hasOption("add-pubchemcid"));
AtomicBoolean doSetSMILESfromInChi = new AtomicBoolean(cmd.hasOption("smiles-from-inchi"));
recordfiles.parallelStream().forEach(filename -> {
String recordString;
logger.info("Working on " + filename + ".");
try {
recordString = FileUtils.readFileToString(filename, StandardCharsets.UTF_8);
// read record in less strict mode
RecordParser recordparser = new RecordParser(new HashSet<>());
Result res = recordparser.parse(recordString);
Record record = null;
if (res.isFailure()) {
System.err.println( "Parsing of \""+ filename + "\" failed. Exiting.");
System.exit(1);
} else {
record = res.get();
}
if (record.DEPRECATED()) {
System.exit(0);
}

String recordstring2 = recordString;
List<Path> recordFiles = Validator.findRecordFiles(cmd.getArgList());
if (recordFiles.isEmpty()) {
logger.error("No files found for validation.");
System.exit(1);
}

RecordParser recordparser = new RecordParser(new HashSet<>());
recordFiles.parallelStream()
.map(Validator::readFile)
.filter(Objects::nonNull)
.forEach(recordString -> {
logger.info("Working on {}.", recordString.getKey());
Record record = parseRecord(recordString, recordparser);
if (record == null || record.DEPRECATED()) return;

String recordStringAfterMod = recordString.getValue();
//if (cmd.hasOption("p") || cmd.hasOption("a")) recordstring2=doPub(record, recordstring2);
//if (cmd.hasOption("n") || cmd.hasOption("a")) recordstring2=doName(record, recordstring2);
//if (cmd.hasOption("l") || cmd.hasOption("a")) recordstring2=doLink(record);
//if (cmd.hasOption("ms_focused_ion") || cmd.hasOption("a")) recordstring2=doFocusedIon(record, recordstring2);

//if (cmd.hasOption("add-inchikey")) {
// recordstring2=doAddInchikey(record);
//}



if (doAddPubchemCid.get()) {
recordstring2=doAddPubchemCID(record);
recordStringAfterMod=doAddPubchemCID(record);
}
if (doSetSMILESfromInChi.get()) {
recordstring2=doSetSMILESfromInChi(record);
recordStringAfterMod=doSetSMILESfromInChi(record);
}
if (!recordString.equals(recordstring2)) {
res = recordparser.parse(recordstring2);
Record record2 =null;
if (res.isFailure()) {

if (!recordString.getValue().equals(recordStringAfterMod)) {
Record recordAfterMod = parseRecord(new SimpleEntry<>(recordString.getKey(), recordStringAfterMod), recordparser);

if (recordAfterMod == null) {
System.err.println( "Validation of new created record file failed. Do not write.");
} else {
try {
FileUtils.write(filename, recordstring2, StandardCharsets.UTF_8);
FileUtils.write(recordString.getKey().toFile(), recordStringAfterMod, StandardCharsets.UTF_8);
}
catch(IOException exp) {
System.err.println( "Writing file \""+ filename + "\" failed. Reason: " + exp.getMessage() );
System.err.println( "Writing file \""+ recordString.getKey() + "\" failed. Reason: " + exp.getMessage() );
System.exit(1);
}
}
}
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
});
});
}

private static CommandLine parseCommandLine(String[] arguments) {
// parse command line
Options options = new Options();
options.addOption("a", "all", false, "execute all operations");
options.addOption("p", "publication", false, "format PUBLICATION tag from given DOI to follow the guidelines of ACS");
options.addOption("n", "name", false, "fix common problems in CH$NAME tag");
options.addOption("l", "link", false, "add links to CH$LINK tag");
options.addOption("r", "rewrite", false, "read and rewrite the file.");
options.addOption("ms_focused_ion", false, "Inspect MS$FOCUSED_ION");
options.addOption(null, "add-inchikey", false, "Add or fix InChIKey from the value in CH$IUPAC");
options.addOption(null, "add-pubchemcid", false, "Add or fix PubChem CID from InChIKey and flag Problems.");
options.addOption(null, "smiles-from-inchi", false, "Set SMILES from existing InChi.");
CommandLine cmd = null;
try {
cmd = new DefaultParser().parse( options, arguments);
}
catch(ParseException e) {
// oops, something went wrong
System.err.println( "Parsing command line failed. Reason: " + e.getMessage() );
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}
if (cmd.getArgList().isEmpty()) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("AddMetaData [OPTIONS] <FILE|DIR> [<FILE|DIR> ...]", options);
System.exit(1);
}


return cmd;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
******************************************************************************/
package massbank.cli;

import massbank.ProjectPropertiesLoader;
import massbank.Record;
import massbank.RecordParser;
import org.apache.commons.cli.*;
Expand Down Expand Up @@ -59,13 +60,7 @@ static String getResourceFileAsString(String fileName) throws IOException {

public static void main(String[] arguments) throws Exception {
// load version and print
final Properties properties = new Properties();
try {
properties.load(ClassLoader.getSystemClassLoader().getResourceAsStream("project.properties"));
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
Properties properties = ProjectPropertiesLoader.loadProperties();
System.out.println("Inspector version: " + properties.getProperty("version"));

// parse command line
Expand Down
Loading

0 comments on commit 3045297

Please sign in to comment.