Skip to content

Commit

Permalink
enable streaming of names; related to Big-Bee-Network/bif#1
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Jul 29, 2024
1 parent 516a43e commit 5606f21
Show file tree
Hide file tree
Showing 10 changed files with 338 additions and 276 deletions.
Original file line number Diff line number Diff line change
@@ -1,80 +1,15 @@
package org.globalbioticinteractions.elton.cmd;

import org.apache.commons.lang.StringUtils;
import org.eol.globi.data.ImportLogger;
import org.eol.globi.domain.InteractType;
import org.eol.globi.data.NodeFactory;
import org.eol.globi.domain.LogContext;
import org.eol.globi.domain.PropertyAndValueDictionary;
import org.eol.globi.domain.Study;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetRegistry;
import org.globalbioticinteractions.elton.util.DatasetProcessorForTSV;
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil;
import org.globalbioticinteractions.elton.util.InteractionWriter;
import org.globalbioticinteractions.elton.util.NodeFactoryForDataset;
import org.globalbioticinteractions.elton.util.NodeFactoryNull;
import org.globalbioticinteractions.elton.util.ProgressUtil;
import org.globalbioticinteractions.elton.util.SpecimenImpl;
import org.globalbioticinteractions.elton.util.StreamUtil;
import org.globalbioticinteractions.elton.util.TabularWriter;
import picocli.CommandLine;

import java.io.PrintStream;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;

import static org.eol.globi.data.DatasetImporterForMetaTable.EVENT_DATE;
import static org.eol.globi.data.DatasetImporterForTSV.ARGUMENT_TYPE_ID;
import static org.eol.globi.data.DatasetImporterForTSV.BASIS_OF_RECORD_ID;
import static org.eol.globi.data.DatasetImporterForTSV.BASIS_OF_RECORD_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.DECIMAL_LATITUDE;
import static org.eol.globi.data.DatasetImporterForTSV.DECIMAL_LONGITUDE;
import static org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_ID;
import static org.eol.globi.data.DatasetImporterForTSV.INTERACTION_TYPE_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.LOCALITY_ID;
import static org.eol.globi.data.DatasetImporterForTSV.LOCALITY_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.REFERENCE_CITATION;
import static org.eol.globi.data.DatasetImporterForTSV.REFERENCE_DOI;
import static org.eol.globi.data.DatasetImporterForTSV.REFERENCE_URL;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_BODY_PART_ID;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_BODY_PART_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_CATALOG_NUMBER;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_COLLECTION_CODE;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_COLLECTION_ID;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_INSTITUTION_CODE;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_ID;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_LIFE_STAGE_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_OCCURRENCE_ID;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_SEX_ID;
import static org.eol.globi.data.DatasetImporterForTSV.SOURCE_SEX_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_ID;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_BODY_PART_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_CATALOG_NUMBER;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_COLLECTION_CODE;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_COLLECTION_ID;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_INSTITUTION_CODE;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_LIFE_STAGE_ID;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_LIFE_STAGE_NAME;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_OCCURRENCE_ID;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_ID;
import static org.eol.globi.data.DatasetImporterForTSV.TARGET_SEX_NAME;
import static org.eol.globi.domain.PropertyAndValueDictionary.CATALOG_NUMBER;
import static org.eol.globi.domain.PropertyAndValueDictionary.COLLECTION_CODE;
import static org.eol.globi.domain.PropertyAndValueDictionary.COLLECTION_ID;
import static org.eol.globi.domain.PropertyAndValueDictionary.INSTITUTION_CODE;
import static org.eol.globi.domain.PropertyAndValueDictionary.OCCURRENCE_ID;
import static org.eol.globi.service.TaxonUtil.SOURCE_TAXON_ID;
import static org.eol.globi.service.TaxonUtil.SOURCE_TAXON_NAME;
import static org.eol.globi.service.TaxonUtil.SOURCE_TAXON_PATH;
import static org.eol.globi.service.TaxonUtil.SOURCE_TAXON_PATH_IDS;
import static org.eol.globi.service.TaxonUtil.SOURCE_TAXON_PATH_NAMES;
import static org.eol.globi.service.TaxonUtil.SOURCE_TAXON_RANK;
import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_ID;
import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_NAME;
import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH;
import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH_IDS;
import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_PATH_NAMES;
import static org.eol.globi.service.TaxonUtil.TARGET_TAXON_RANK;

@CommandLine.Command(
name = "interactions",
Expand All @@ -83,133 +18,20 @@
)
public class CmdInteractions extends CmdTabularWriterParams {

public static class TsvWriter implements InteractionWriter, TabularWriter {
private final PrintStream out;

TsvWriter(PrintStream out) {
this.out = out;
}

@Override
public void write(SpecimenImpl source, InteractType type, SpecimenImpl target, Study study, Dataset dataset) {
Stream<String> valueStream = getValues(source, type, target, study, dataset);
String row = StreamUtil.tsvRowOf(valueStream);
out.println(row);
}

private static Stream<String> getValues(SpecimenImpl source, InteractType type, SpecimenImpl target, Study study, Dataset dataset) {
Stream<String> interactStream = Stream.of(type.getIRI(), type.getLabel());

String sourceOccurrenceId = valueOrEmpty(source, OCCURRENCE_ID);
String sourceCatalogNumber = valueOrEmpty(source, CATALOG_NUMBER);
String sourceCollectionCode = valueOrEmpty(source, COLLECTION_CODE);
String sourceCollectionId = valueOrEmpty(source, COLLECTION_ID);
String sourceInstitutionCode = valueOrEmpty(source, INSTITUTION_CODE);

String targetOccurrenceId = valueOrEmpty(target, OCCURRENCE_ID);
String targetCatalogNumber = valueOrEmpty(target, CATALOG_NUMBER);
String targetCollectionCode = valueOrEmpty(target, COLLECTION_CODE);
String targetCollectionId = valueOrEmpty(target, COLLECTION_ID);
String targetInstitutionCode = valueOrEmpty(target, INSTITUTION_CODE);

return Stream.of(
Stream.of(source.isSupportingClaim() ? PropertyAndValueDictionary.SUPPORTS : PropertyAndValueDictionary.REFUTES),
Stream.of(sourceOccurrenceId, sourceCatalogNumber, sourceCollectionCode, sourceCollectionId, sourceInstitutionCode),
StreamUtil.streamOf(source.taxon),
StreamUtil.streamOf(source),
interactStream,
Stream.of(targetOccurrenceId, targetCatalogNumber, targetCollectionCode, targetCollectionId, targetInstitutionCode),
StreamUtil.streamOf(target.taxon),
StreamUtil.streamOf(target),
StreamUtil.streamOf(target.getBasisOfRecord()),
StreamUtil.streamOf(target.getEventDate()),
StreamUtil.streamOf(target.getSampleLocation()),
StreamUtil.streamOf(study),
CmdUtil.datasetInfo(dataset).stream()).flatMap(x -> x);
}

private static String valueOrEmpty(SpecimenImpl source, String key) {
String value = source.getProperty(key);
return StringUtils.isBlank(value) ? "" : value;
}

@Override
public void writeHeader() {
Stream<String> keys = getKeys();
out.println(StreamUtil.tsvRowOf(keys));
}

private static Stream<String> getKeys() {
return Stream.concat(Stream.of(
ARGUMENT_TYPE_ID,
SOURCE_OCCURRENCE_ID,
SOURCE_CATALOG_NUMBER,
SOURCE_COLLECTION_CODE,
SOURCE_COLLECTION_ID,
SOURCE_INSTITUTION_CODE,
SOURCE_TAXON_ID,
SOURCE_TAXON_NAME,
SOURCE_TAXON_RANK,
SOURCE_TAXON_PATH_IDS,
SOURCE_TAXON_PATH,
SOURCE_TAXON_PATH_NAMES,
SOURCE_BODY_PART_ID,
SOURCE_BODY_PART_NAME,
SOURCE_LIFE_STAGE_ID,
SOURCE_LIFE_STAGE_NAME,
SOURCE_SEX_ID,
SOURCE_SEX_NAME,
INTERACTION_TYPE_ID,
INTERACTION_TYPE_NAME,
TARGET_OCCURRENCE_ID,
TARGET_CATALOG_NUMBER,
TARGET_COLLECTION_CODE,
TARGET_COLLECTION_ID,
TARGET_INSTITUTION_CODE,
TARGET_TAXON_ID,
TARGET_TAXON_NAME,
TARGET_TAXON_RANK,
TARGET_TAXON_PATH_IDS,
TARGET_TAXON_PATH,
TARGET_TAXON_PATH_NAMES,
TARGET_BODY_PART_ID,
TARGET_BODY_PART_NAME,
TARGET_LIFE_STAGE_ID,
TARGET_LIFE_STAGE_NAME,
TARGET_SEX_ID,
TARGET_SEX_NAME,
BASIS_OF_RECORD_ID,
BASIS_OF_RECORD_NAME,
EVENT_DATE,
DECIMAL_LATITUDE,
DECIMAL_LONGITUDE,
LOCALITY_ID,
LOCALITY_NAME,
REFERENCE_DOI,
REFERENCE_URL,
REFERENCE_CITATION

), StreamUtil.datasetHeaderFields());
}
}

@Override
public void run() {
run(System.out);
}

void run(PrintStream out) {
TsvWriter writer = new TsvWriter(out);
if (!shouldSkipHeader()) {
writer.writeHeader();
}

DatasetRegistry registry = DatasetRegistryUtil.forCacheDirOrLocalDir(
getCacheDir(),
getWorkDir(),
createInputStreamFactory());

NodeFactoryNull nodeFactory = new NodeFactoryForDataset(writer, new DatasetProcessorForTSV());
NodeFactory nodeFactory = WriterUtil.nodeFactoryForWritingInteractions(!shouldSkipHeader(), out);

CmdUtil.handleNamespaces(
registry,
nodeFactory,
Expand Down
65 changes: 2 additions & 63 deletions src/main/java/org/globalbioticinteractions/elton/cmd/CmdNames.java
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
package org.globalbioticinteractions.elton.cmd;

import org.eol.globi.data.NodeFactory;
import org.eol.globi.data.NodeFactoryException;
import org.eol.globi.domain.Interaction;
import org.eol.globi.domain.Specimen;
import org.eol.globi.domain.Study;
import org.eol.globi.domain.Taxon;
import org.eol.globi.tool.NullImportLogger;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetRegistry;
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil;
import org.globalbioticinteractions.elton.util.NodeFactoryNull;
import org.globalbioticinteractions.elton.util.StreamUtil;
import org.globalbioticinteractions.elton.util.TaxonWriter;
import picocli.CommandLine;

import java.io.PrintStream;
import java.util.stream.Stream;

@CommandLine.Command(
name = "names",
Expand All @@ -31,63 +21,12 @@ public void run() {
}

void run(PrintStream out) {
TaxonWriter writer = createWriter(out);
if (!shouldSkipHeader()) {
writer.writeHeader();
}

DatasetRegistry registry = DatasetRegistryUtil.forCacheDirOrLocalDir(getCacheDir(), getWorkDir(), createInputStreamFactory());
NodeFactory nodeFactory = createFactory(writer);
CmdUtil.handleNamespaces(registry, nodeFactory, getNamespaces(), "listing taxa", getStderr(), new NullImportLogger());
}

private TaxonWriter createWriter(PrintStream out) {
return new TaxonWriter() {

@Override
public void write(Taxon taxon, Dataset dataset) {
Stream<String> rowStream = Stream.concat(StreamUtil.streamOf(taxon), StreamUtil.streamOf(dataset));
String row = StreamUtil.tsvRowOf(rowStream);
out.println(row);
}

@Override
public void writeHeader() {
out.println(StreamUtil.tsvRowOf(
Stream.concat(Stream.of(
"taxonId",
"taxonName",
"taxonRank",
"taxonPathIds",
"taxonPath",
"taxonPathNames"),
StreamUtil.datasetHeaderFields())));
}
};
}

private NodeFactoryNull createFactory(TaxonWriter writer) {
return new NodeFactoryNull() {
Dataset dataset;

@Override
public Dataset getOrCreateDataset(Dataset dataset) {
this.dataset = dataset;
return super.getOrCreateDataset(dataset);
}

@Override
public Specimen createSpecimen(Interaction interaction, Taxon taxon) throws NodeFactoryException {
writer.write(taxon, dataset);
return super.createSpecimen(interaction, taxon);
}
NodeFactory nodeFactory = WriterUtil.nodeFactoryForTaxonWriting(!shouldSkipHeader(), out);


@Override
public Specimen createSpecimen(Study study, Taxon taxon) throws NodeFactoryException {
return super.createSpecimen(study, taxon);
}
};
CmdUtil.handleNamespaces(registry, nodeFactory, getNamespaces(), "listing taxa", getStderr(), new NullImportLogger());
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.eol.globi.util.InputStreamFactory;
import org.eol.globi.util.ResourceServiceLocal;
import org.eol.globi.util.ResourceServiceLocalAndRemote;
import org.eol.globi.util.ResourceServiceRemote;
import org.globalbioticinteractions.dataset.CitationUtil;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetConstant;
Expand Down Expand Up @@ -133,14 +132,9 @@ private void reviewCachedOrRemote(List<String> namespaces, InputStreamFactory in
private void review(String repoName, DatasetRegistry registry, InputStreamFactory inputStreamFactory) throws StudyImporterException {
final AtomicLong noteCounter = new AtomicLong(0);
final AtomicLong infoCounter = new AtomicLong(0);

ParserFactoryLocal parserFactory = new ParserFactoryLocal(getClass());
AtomicInteger interactionCounter = new AtomicInteger(0);
ReviewReportLogger reviewReportLogger = createReviewReportLogger(repoName, noteCounter, infoCounter);

NodeFactoryLogging nodeFactory = new NodeFactoryLogging(interactionCounter, reviewReportLogger);
DatasetImporterForRegistry studyImporter = new DatasetImporterForRegistry(parserFactory, nodeFactory, registry);
studyImporter.setLogger(reviewReportLogger);
ReviewReportLogger reviewReportLogger = createReviewReportLogger(repoName, noteCounter, infoCounter);

try {
Dataset dataset = new DatasetFactory(
Expand All @@ -153,12 +147,16 @@ private void review(String repoName, DatasetRegistry registry, InputStreamFactor
&& StringUtils.endsWith(citationString, ">")) {
reviewReportLogger.warn(null, "no citation found for dataset at [" + dataset.getArchiveURI() + "]");
}
NodeFactoryLogging nodeFactory = new NodeFactoryLogging(interactionCounter, reviewReportLogger);
nodeFactory.getOrCreateDataset(dataset);
getStderr().print("creating review [" + repoName + "]... ");
if (!shouldSkipHeader()) {
logHeader(getStdout());
}

ParserFactoryLocal parserFactory = new ParserFactoryLocal(getClass());
DatasetImporterForRegistry studyImporter = new DatasetImporterForRegistry(parserFactory, nodeFactory, registry);
studyImporter.setLogger(reviewReportLogger);
DatasetImportUtil.importDataset(
null,
dataset,
Expand Down
Loading

0 comments on commit 5606f21

Please sign in to comment.