From fd359f53ceb94dfef69b959b169fd0b31cf26cd6 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 13 Oct 2022 12:21:11 -0400 Subject: [PATCH 01/79] Next development iteration `v0.4.7-SNAPSHOT`. Signed-off-by: Daniel Danis --- README.md | 2 +- phenopacket-tools-builder/pom.xml | 2 +- phenopacket-tools-cli/pom.xml | 2 +- .../src/main/java/org/phenopackets/phenopackettools/Main.java | 2 +- phenopacket-tools-converter/pom.xml | 2 +- phenopacket-tools-test/pom.xml | 2 +- phenopacket-tools-util/pom.xml | 2 +- phenopacket-tools-validator-core/pom.xml | 2 +- phenopacket-tools-validator-jsonschema/pom.xml | 2 +- pom.xml | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 9d2e4fd6..5fbeddb6 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The cli application works in a standard UNIX-like manner. ```shell cd phenopacket-tools ./mvnw package -PXF_VERSION="0.4.6" +PXF_VERSION="0.4.7-SNAPSHOT" alias pfx-tools="java -jar $(pwd)/phenopacket-tools-cli/target/phenopacket-tools-cli-${PXF_VERSION}.jar" pfx-tools --help ``` diff --git a/phenopacket-tools-builder/pom.xml b/phenopacket-tools-builder/pom.xml index f403a9ae..1d27d63d 100644 --- a/phenopacket-tools-builder/pom.xml +++ b/phenopacket-tools-builder/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7-SNAPSHOT phenopacket-tools-builder diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 1ac56d86..750e3107 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7-SNAPSHOT phenopacket-tools-cli diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java index ffb537f2..d3ea55ab 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java @@ -26,7 +26,7 @@ public class Main { public static final String HEADER = "phenopacket-tools\nAn application for creating, converting and validating GA4GH phenopackets.\n"; - public static final String VERSION = "phenopacket-tools v0.4.6"; + public static final String VERSION = "phenopacket-tools v0.4.7-SNAPSHOT"; // Maximum number of characters in line of the usage message. public static final int USAGE_WIDTH = 120; diff --git a/phenopacket-tools-converter/pom.xml b/phenopacket-tools-converter/pom.xml index c8628e2a..843db16e 100644 --- a/phenopacket-tools-converter/pom.xml +++ b/phenopacket-tools-converter/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7-SNAPSHOT phenopacket-tools-converter diff --git a/phenopacket-tools-test/pom.xml b/phenopacket-tools-test/pom.xml index 36a9d7bf..8d9730f5 100644 --- a/phenopacket-tools-test/pom.xml +++ b/phenopacket-tools-test/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.6 + 0.4.7-SNAPSHOT 4.0.0 diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index 575e8e8f..b7ed424b 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.6 + 0.4.7-SNAPSHOT 4.0.0 diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index 4e848013..50d38a2b 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7-SNAPSHOT phenopacket-tools-validator-core diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index 84f949d4..b27e0686 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7-SNAPSHOT phenopacket-tools-validator-jsonschema diff --git a/pom.xml b/pom.xml index 000821e0..b4c37468 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.6 + 0.4.7-SNAPSHOT pom From f848e0022ac6d2f8b889fb2208faa27cfba64d23 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 13 Oct 2022 16:29:53 -0400 Subject: [PATCH 02/79] Allow to specify a verbosity. Signed-off-by: Daniel Danis --- .../src/main/java/module-info.java | 1 + .../phenopackettools/command/BaseCommand.java | 61 ++++++++++++++++--- .../command/BaseIOCommand.java | 2 +- .../command/ConvertCommand.java | 5 +- .../command/ExamplesCommand.java | 21 +++---- .../command/ValidateCommand.java | 5 +- .../src/main/resources/logback.xml | 8 +-- 7 files changed, 64 insertions(+), 39 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/module-info.java b/phenopacket-tools-cli/src/main/java/module-info.java index dbb1b933..04e7998b 100644 --- a/phenopacket-tools-cli/src/main/java/module-info.java +++ b/phenopacket-tools-cli/src/main/java/module-info.java @@ -13,6 +13,7 @@ requires commons.csv; requires info.picocli; requires org.slf4j; + requires logback.classic; opens org.phenopackets.phenopackettools.command to info.picocli; opens org.phenopackets.phenopackettools.command.validate to info.picocli; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java index 929fd50a..be6bd20c 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java @@ -1,8 +1,11 @@ package org.phenopackets.phenopackettools.command; +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.LoggerContext; import org.phenopackets.phenopackettools.Main; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import picocli.CommandLine; import java.io.IOException; import java.io.InputStream; @@ -13,19 +16,48 @@ public abstract class BaseCommand implements Callable { private static final Logger LOGGER = LoggerFactory.getLogger(BaseCommand.class); - protected static final String BANNER = readBanner(); - protected static final Properties APPLICATION_PROPERTIES = readApplicationProperties(); protected static final String PHENOPACKET_TOOLS_VERSION = APPLICATION_PROPERTIES.getProperty("phenopacket-tools.version", "UNKNOWN-version"); - private static String readBanner() { - try (InputStream is = Main.class.getResourceAsStream("banner.txt")) { - return is == null ? "" : new String(is.readAllBytes()); - } catch (IOException e) { - LOGGER.error("Unable to read banner. Please report to the developers: {}", e.getMessage(), e); - return ""; + @CommandLine.Option(names = {"-v"}, description = {"Specify multiple -v options to increase verbosity.", + "For example, `-v -v -v` or `-vvv`"}) + public boolean[] verbosity = {}; + + @Override + public Integer call() { + // (0) Setup verbosity and print banner. + setupLoggingAndPrintBanner(); + + // (1) Run the command functionality. + return execute(); + } + + protected abstract Integer execute(); + + private void setupLoggingAndPrintBanner() { + Level level = parseVerbosityLevel(); + + LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); + context.getLogger(Logger.ROOT_LOGGER_NAME).setLevel(level); + + if (!(level.equals(Level.WARN) || level.equals(Level.ERROR))) + printBanner(); + } + + private Level parseVerbosityLevel() { + int verbosity = 0; + for (boolean a : this.verbosity) { + if (a) verbosity++; } + + return switch (verbosity) { + case 0 -> Level.WARN; + case 1 -> Level.INFO; + case 2 -> Level.DEBUG; + case 3 -> Level.TRACE; + default -> Level.ALL; + }; } private static Properties readApplicationProperties() { @@ -39,8 +71,17 @@ private static Properties readApplicationProperties() { return properties; } - protected static void printBanner() { - System.err.println(BANNER); + private static void printBanner() { + System.err.println(readBanner()); + } + + private static String readBanner() { + try (InputStream is = Main.class.getResourceAsStream("banner.txt")) { + return is == null ? "" : new String(is.readAllBytes()); + } catch (IOException e) { + LOGGER.error("Unable to read banner. Please report to the developers: {}", e.getMessage(), e); + return ""; + } } } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java index 183f2013..070e9ddd 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java @@ -27,7 +27,7 @@ public abstract class BaseIOCommand extends BaseCommand { private static final Logger LOGGER = LoggerFactory.getLogger(BaseIOCommand.class); - @CommandLine.ArgGroup(validate = false, heading = "Inputs:%s") + @CommandLine.ArgGroup(validate = false, heading = "Inputs:%n") public InputSection inputSection = new InputSection(); public static class InputSection { diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 48a8528a..f2221961 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -51,10 +51,7 @@ public static class ConvertSection { } @Override - public Integer call() { - // (0) Print banner. - printBanner(); - + protected Integer execute() { if (!checkInputArgumentsAreOk()) return 1; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java index e46b69e3..4c71a45a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java @@ -1,7 +1,5 @@ package org.phenopackets.phenopackettools.command; - - import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.JsonNodeFactory; @@ -20,7 +18,6 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.concurrent.Callable; @Command(name = "examples", mixinStandardHelpOptions = true, @@ -34,21 +31,19 @@ public class ExamplesCommand extends BaseCommand { @Override - public Integer call() throws Exception { - printBanner(); - - Path phenopacketDir = createADirectoryIfDoesNotExist(output.resolve("phenopackets")); - Path familyDir = createADirectoryIfDoesNotExist(output.resolve("families")); - Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); - + protected Integer execute() { try { + Path phenopacketDir = createADirectoryIfDoesNotExist(output.resolve("phenopackets")); + Path familyDir = createADirectoryIfDoesNotExist(output.resolve("families")); + Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); + // Phenopackets output(new AtaxiaWithVitaminEdeficiency().getPhenopacket(), phenopacketDir, "AVED"); output(new BethlehamMyopathy().getPhenopacket(), phenopacketDir, "bethleham-myopathy"); output(new Holoprosencephaly5().getPhenopacket(), phenopacketDir, "holoprosencephaly5"); output(new Marfan().getPhenopacket(), phenopacketDir, "marfan"); output(new NemalineMyopathyPrenatal().getPhenopacket(), phenopacketDir, "nemalineMyopathy"); - output(new Pseudoexfoliation().getPhenopacket(), phenopacketDir,"pseudoexfoliation"); + output(new Pseudoexfoliation().getPhenopacket(), phenopacketDir, "pseudoexfoliation"); output(new DuchenneExon51Deletion().getPhenopacket(), phenopacketDir, "duchenne"); output(new SquamousCellCancer().getPhenopacket(), phenopacketDir, "squamous-cell-esophageal-carcinoma"); output(new UrothelialCancer().getPhenopacket(), phenopacketDir, "urothelial-cancer"); @@ -80,10 +75,10 @@ private static void output(Message phenopacket, Path outDir, String basename) { String yamlName = basename + ".yml"; outputYamlPhenopacket(phenopacket, outDir, yamlName); String jsonName = basename + ".json"; - outputPhenopacket(phenopacket, outDir,jsonName); + outputPhenopacket(phenopacket, outDir, jsonName); } - private static void outputPhenopacket(Message phenopacket, Path outdir,String fileName) { + private static void outputPhenopacket(Message phenopacket, Path outdir, String fileName) { outputJsonMessage(phenopacket, outdir, fileName); } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java index 843ad62e..50f25322 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -49,10 +49,7 @@ public static class ValidateSection { } @Override - public Integer call() { - // (0) Print banner. - printBanner(); - + protected Integer execute() { // (1) Read the input v2 message(s). List messages = readMessagesOrExit(PhenopacketSchemaVersion.V2); diff --git a/phenopacket-tools-cli/src/main/resources/logback.xml b/phenopacket-tools-cli/src/main/resources/logback.xml index 7b5f00ec..6d55578e 100644 --- a/phenopacket-tools-cli/src/main/resources/logback.xml +++ b/phenopacket-tools-cli/src/main/resources/logback.xml @@ -3,19 +3,13 @@ - - INFO - System.err ${pattern} - - - - + \ No newline at end of file From 9d4046a07dc7a20745ed62e580e1a5c46d1230c6 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 13 Oct 2022 16:32:49 -0400 Subject: [PATCH 03/79] Describe the verbosity in the docs. Signed-off-by: Daniel Danis --- docs/cli.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/cli.rst b/docs/cli.rst index ef383414..f60a64b2 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -70,6 +70,10 @@ In the next sections, we will run *phenopacket-tools* by using the following ali $ alias pxf="java -jar phenopacket-tools-cli-${project.version}.jar" +.. note:: + The commands report warnings and errors by default. Use `-v` to increase the verbosity and see what's + going on under the hood. The `-v` can be specified multiple times (e.g. `-vvv`). + *examples* - generate examples of the top-level elements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 8f39884e0629ab0ac8405a4e59dcdb9060b473e9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 14 Oct 2022 15:50:25 -0400 Subject: [PATCH 04/79] Add `ValidationWorkflowRunnerBuilder`. Signed-off-by: Daniel Danis --- .../core/ValidationWorkflowRunner.java | 7 ++ .../core/ValidationWorkflowRunnerBuilder.java | 71 +++++++++++ .../src/main/java/module-info.java | 6 + ... BaseValidationWorkflowRunnerBuilder.java} | 13 +- .../JsonSchemaValidationWorkflowRunner.java | 115 +++++++----------- ...SchemaValidationWorkflowRunnerBuilder.java | 86 +++++++++++++ 6 files changed, 219 insertions(+), 79 deletions(-) create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java rename phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/{ValidationWorkflowRunnerBuilder.java => BaseValidationWorkflowRunnerBuilder.java} (85%) create mode 100644 phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java index 82a7448d..76fc8a37 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java @@ -16,6 +16,13 @@ *

* Validator provides a list with {@link ValidatorInfo} that describes validations * done by the {@link ValidationWorkflowRunner}. + *

+ * The validation is generally done in 2 phases, syntax and semantic phases. + * The syntax phase checks if the building blocks meet the requirements independently + * (e.g. all required fields are defined for a {@link org.phenopackets.schema.v2.core.Resource}). + * The semantic validation checks for presence of errors in the context of the entire top-level element + * (e.g. a phenopacket contains an HPO term but an HPO {@link org.phenopackets.schema.v2.core.Resource} is missing + * in {@link org.phenopackets.schema.v2.core.MetaData}). * * @param type of the top-level element of the Phenopacket schema. */ diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java new file mode 100644 index 00000000..e4018af0 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java @@ -0,0 +1,71 @@ +package org.phenopackets.phenopackettools.validator.core; + +import com.google.protobuf.MessageOrBuilder; + +import java.util.ArrayList; +import java.util.List; + +/** + * The base builder for constructing {@link ValidationWorkflowRunner}. The builder keeps track of + * the syntax validators and semantic validators. + * @param + */ +public abstract class ValidationWorkflowRunnerBuilder { + + protected final List> syntaxValidators = new ArrayList<>(); + protected final List> semanticValidators = new ArrayList<>(); + + /** + * Add a syntax validator. + * + * @param syntaxValidator the syntax validator + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addSyntaxValidator(PhenopacketValidator syntaxValidator) { + this.syntaxValidators.add(syntaxValidator); + return this; + } + + /** + * Add syntax validators in bulk. + * + * @param validators the syntax validators + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addAllSyntaxValidators(List> validators) { + // A slightly more efficient implementation comparing to the default method on the interface. + this.syntaxValidators.addAll(validators); + return this; + } + + /** + * Add a semantic validator. + * + * @param semanticValidator the semantic validator + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addSemanticValidator(PhenopacketValidator semanticValidator) { + this.semanticValidators.add(semanticValidator); + return this; + } + + /** + * Add semantic validators in bulk. + * + * @param validators the semantic validators + * @return the builder + */ + public ValidationWorkflowRunnerBuilder addAllSemanticValidators(List> validators) { + // A slightly more efficient implementation comparing to the default method on the interface. + this.semanticValidators.addAll(validators); + return this; + } + + /** + * Finish building of the {@link ValidationWorkflowRunner}. + * + * @return the runner + */ + public abstract ValidationWorkflowRunner build(); + +} diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java index 092a770f..a70afb52 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java @@ -1,3 +1,9 @@ +/** + * The module provides a {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} implementation + * backed by a JSON schema validator. + * + * @see org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner + */ module org.phenopackets.phenopackettools.validator.jsonschema { requires org.phenopackets.phenopackettools.util; requires transitive org.phenopackets.phenopackettools.validator.core; diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/ValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java similarity index 85% rename from phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/ValidationWorkflowRunnerBuilder.java rename to phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java index 5d75fc92..d21ec40c 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/ValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/BaseValidationWorkflowRunnerBuilder.java @@ -18,14 +18,14 @@ import java.util.List; /** - * A utility class that provides {@link JsonSchemaValidationWorkflowRunner.Builder} implementations for top-level + * A utility class that provides {@link JsonSchemaValidationWorkflowRunnerBuilder} implementations for top-level * elements of Phenopacket schema. *

* The class exists because we do not want to expose {@link JsonSchemaValidator} to the outside world. */ -abstract class ValidationWorkflowRunnerBuilder extends JsonSchemaValidationWorkflowRunner.Builder { +abstract class BaseValidationWorkflowRunnerBuilder extends JsonSchemaValidationWorkflowRunnerBuilder { - private static final Logger LOGGER = LoggerFactory.getLogger(ValidationWorkflowRunnerBuilder.class); + private static final Logger LOGGER = LoggerFactory.getLogger(BaseValidationWorkflowRunnerBuilder.class); @Override public JsonSchemaValidationWorkflowRunner build() { @@ -33,6 +33,7 @@ public JsonSchemaValidationWorkflowRunner build() { return new JsonSchemaValidationWorkflowRunner<>(getFormatConverter(), getBaseRequirementsValidator(), requirementValidators, + syntaxValidators, semanticValidators); } @@ -56,7 +57,7 @@ private List readRequirementValidators(List schemaUrls return requirementValidators; } - static class PhenopacketWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + static class PhenopacketWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @Override protected PhenopacketFormatConverter getFormatConverter() { @@ -69,7 +70,7 @@ protected JsonSchemaValidator getBaseRequirementsValidator() { } } - static class FamilyWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + static class FamilyWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @Override protected PhenopacketFormatConverter getFormatConverter() { return PhenopacketFormatConverters.familyConverter(); @@ -82,7 +83,7 @@ protected JsonSchemaValidator getBaseRequirementsValidator() { } - static class CohortWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + static class CohortWorkflowRunnerBuilder extends BaseValidationWorkflowRunnerBuilder { @Override protected PhenopacketFormatConverter getFormatConverter() { return PhenopacketFormatConverters.cohortConverter(); diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index 28918f30..ae1ee8e2 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -13,9 +13,6 @@ import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -25,17 +22,18 @@ * Validates if given top-level element satisfies the following criteria: *

    *
  • data format requirements - for instance if the element is a valid JSON document if JSON input is provided
  • - *
  • basic Phenopacket schema requirements - the requirements described by the reference documentation. + *
  • basic Phenopacket schema syntax requirements - the requirements described by the reference documentation. * Absence of a required field is an {@link ValidationLevel#ERROR} and absence of a recommended field is - * a {@link ValidationLevel#WARNING}.
  • - *
  • custom requirements - requirements provided in a JSON schema document(s) provided by the user.
  • - *
  • semantic requirements - requirements checked by {@link PhenopacketValidator}s provided by the user.
  • + * a {@link ValidationLevel#WARNING}, + *
  • custom syntax requirements - requirements provided in a JSON schema document(s) provided by the user,
  • + *
  • syntax requirements - requirements checked by the provided ad hoc {@link PhenopacketValidator}s,
  • + *
  • semantic requirements - requirements checked by the provided {@link PhenopacketValidator}s.
  • *
*

- * The validation is performed in steps as outlined by the list above. Note that the data format validation must + * The validation is performed in the order as outlined above. Note that the data format validation must * pass in order for the latter steps to run. *

- * Use one of {@link Builder}s provided via static constructors (e.g. {@link #phenopacketBuilder()}) to build + * Use one of {@link JsonSchemaValidationWorkflowRunnerBuilder}s provided via static constructors (e.g. {@link #phenopacketBuilder()}) to build * the validation workflow. * * @param must be one of the three top-level elements of the Phenopacket schema: @@ -48,40 +46,43 @@ public class JsonSchemaValidationWorkflowRunner impl private final PhenopacketFormatConverter converter; private final JsonSchemaValidator baseValidator; private final Collection requirementValidators; + private final Collection> syntaxValidators; private final Collection> semanticValidators; private final List validatorInfos; /** - * @return a {@link Builder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating + * @return a {@link JsonSchemaValidationWorkflowRunnerBuilder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating * {@link PhenopacketOrBuilder}. */ - public static Builder phenopacketBuilder() { - return new ValidationWorkflowRunnerBuilder.PhenopacketWorkflowRunnerBuilder(); + public static JsonSchemaValidationWorkflowRunnerBuilder phenopacketBuilder() { + return new BaseValidationWorkflowRunnerBuilder.PhenopacketWorkflowRunnerBuilder(); } /** - * @return a {@link Builder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating + * @return a {@link JsonSchemaValidationWorkflowRunnerBuilder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating * {@link FamilyOrBuilder}. */ - public static Builder familyBuilder() { - return new ValidationWorkflowRunnerBuilder.FamilyWorkflowRunnerBuilder(); + public static JsonSchemaValidationWorkflowRunnerBuilder familyBuilder() { + return new BaseValidationWorkflowRunnerBuilder.FamilyWorkflowRunnerBuilder(); } /** - * @return a {@link Builder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating - * {@link CohortOrBuilder}. + * @return a {@link JsonSchemaValidationWorkflowRunnerBuilder} for building a {@link JsonSchemaValidationWorkflowRunner} for validating + * {@link CohortOrBuilder} */ - public static Builder cohortBuilder() { - return new ValidationWorkflowRunnerBuilder.CohortWorkflowRunnerBuilder(); + public static JsonSchemaValidationWorkflowRunnerBuilder cohortBuilder() { + return new BaseValidationWorkflowRunnerBuilder.CohortWorkflowRunnerBuilder(); } JsonSchemaValidationWorkflowRunner(PhenopacketFormatConverter converter, JsonSchemaValidator baseValidator, Collection requirementValidators, + Collection> syntaxValidators, Collection> semanticValidators) { this.converter = Objects.requireNonNull(converter); this.baseValidator = Objects.requireNonNull(baseValidator); this.requirementValidators = Objects.requireNonNull(requirementValidators); + this.syntaxValidators = Objects.requireNonNull(syntaxValidators); this.semanticValidators = Objects.requireNonNull(semanticValidators); this.validatorInfos = summarizeValidatorInfos(baseValidator, requirementValidators, semanticValidators); } @@ -133,6 +134,12 @@ public ValidationResults validate(String json) { return wrapUpValidation(e, builder); } + try { + validateSyntax(json, builder); + } catch (ConversionException e) { + return wrapUpValidation(e, builder); + } + try { validateSemantic(json, builder); } catch (ConversionException e) { @@ -155,6 +162,8 @@ public ValidationResults validate(T item) { return wrapUpValidation(e, builder); } + validateSyntax(item, builder); + // No conversion necessary, hence no need to guard against the `ConversionException`. validateSemantic(item, builder); @@ -176,7 +185,7 @@ private String parseToString(byte[] payload) throws ConversionException { /** * Validate requirements using {@link #baseValidator} and all {@link #requirementValidators}. * - * @throws ConversionException if {@code json} cannot be mapped into {@link JsonNode}. + * @throws ConversionException if {@code json} cannot be mapped into {@link JsonNode} */ private void validateRequirements(String json, ValidationResults.Builder builder) throws ConversionException { JsonNode jsonNode; @@ -194,10 +203,22 @@ private void validateRequirements(String json, ValidationResults.Builder builder } } + private void validateSyntax(String item, ValidationResults.Builder builder) throws ConversionException { + T component = converter.toItem(item); + + validateSyntax(component, builder); + } + + private void validateSyntax(T component, ValidationResults.Builder builder) { + for (PhenopacketValidator validator : syntaxValidators) { + builder.addResults(validator.validatorInfo(), validator.validate(component)); + } + } + /** * Validate semantic requirements using {@link #semanticValidators}. * - * @throws ConversionException if {@code item} cannot be mapped into {@link T}. + * @throws ConversionException if {@code item} cannot be mapped into {@link T} */ private void validateSemantic(String item, ValidationResults.Builder builder) throws ConversionException { T component = converter.toItem(item); @@ -219,56 +240,4 @@ private static ValidationResults wrapUpValidation(ConversionException e, Validat .build(); } - /** - * A builder for {@link JsonSchemaValidationWorkflowRunner}. - *

- * Build the {@link JsonSchemaValidationWorkflowRunner} by providing JSON schema documents - * either as {@link Path} or {@link URL}s, and {@link PhenopacketValidator}s for performing semantic validation. - * - * @param one of top-level elements of the Phenopacket schema. - */ - public static abstract class Builder { - - protected final List jsonSchemaUrls = new ArrayList<>(); - protected final List> semanticValidators = new ArrayList<>(); - - protected Builder() { - // private no-op - } - - public Builder addJsonSchema(Path path) throws MalformedURLException { - return addJsonSchema(path.toUri().toURL()); - } - - public Builder addJsonSchema(URL url) { - jsonSchemaUrls.add(url); - return this; - } - - public Builder addAllJsonSchemaPaths(List paths) throws MalformedURLException { - for (Path path : paths) { - jsonSchemaUrls.add(path.toUri().toURL()); - } - return this; - } - - public Builder addAllJsonSchemaUrls(List urls) { - jsonSchemaUrls.addAll(urls); - return this; - } - - public Builder addSemanticValidator(PhenopacketValidator semanticValidator) { - this.semanticValidators.add(semanticValidator); - return this; - } - - public Builder addAllSemanticValidators(List> semanticValidators) { - this.semanticValidators.addAll(semanticValidators); - return this; - } - - public abstract JsonSchemaValidationWorkflowRunner build(); - - } - } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java new file mode 100644 index 00000000..a66a059a --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerBuilder.java @@ -0,0 +1,86 @@ +package org.phenopackets.phenopackettools.validator.jsonschema; + +import com.google.protobuf.MessageOrBuilder; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunnerBuilder; + +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +/** + * A builder for {@link JsonSchemaValidationWorkflowRunner}. + *

+ * Build the {@link JsonSchemaValidationWorkflowRunner} by providing JSON schema documents + * either as {@link Path} or {@link URL}s, and {@link PhenopacketValidator}s for performing semantic validation. + * + * @param one of top-level elements of the Phenopacket schema. + */ +public abstract class JsonSchemaValidationWorkflowRunnerBuilder extends ValidationWorkflowRunnerBuilder { + + protected final List jsonSchemaUrls = new ArrayList<>(); + + protected JsonSchemaValidationWorkflowRunnerBuilder() { + // private no-op + } + + /** + * Register a JSON schema present at a given {@code path} to be used as a syntax validator. The {@code path} + * will be interpreted as a {@link URL}. + * + * @param path path to the JSON schema document + * @return the builder + * @throws MalformedURLException if the {@code path} cannot be converted to a well-formatted {@link URL} + */ + public JsonSchemaValidationWorkflowRunnerBuilder addJsonSchema(Path path) throws MalformedURLException { + return addJsonSchema(path.toUri().toURL()); + } + + /** + * Register a JSON schema present at a given {@code url} to be used as a syntax validator. + * + * @param url url to the JSON schema document + * @return the builder + */ + public JsonSchemaValidationWorkflowRunnerBuilder addJsonSchema(URL url) { + jsonSchemaUrls.add(url); + return this; + } + + /** + * Add JSON schemas in bulk. + * + * @param paths an iterable of paths pointing to JSON schema documents + * @return the builder + * @see JsonSchemaValidationWorkflowRunnerBuilder#addJsonSchema(Path) + */ + public JsonSchemaValidationWorkflowRunnerBuilder addAllJsonSchemaPaths(Iterable paths) throws MalformedURLException { + for (Path path : paths) { + jsonSchemaUrls.add(path.toUri().toURL()); + } + return this; + } + + /** + * Add JSON schemas in bulk. + * + * @param urls an iterable of urls pointing to JSON schema documents + * @return the builder + * @see JsonSchemaValidationWorkflowRunnerBuilder#addJsonSchema(URL) + */ + public JsonSchemaValidationWorkflowRunnerBuilder addAllJsonSchemaUrls(List urls) { + jsonSchemaUrls.addAll(urls); + return this; + } + + /** + * Finish building the {@link JsonSchemaValidationWorkflowRunner}. + * + * @return the runner + */ + @Override + public abstract JsonSchemaValidationWorkflowRunner build(); + +} From 40f2687d4137c6a9e6763ab29a6551dbce7240b3 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 18 Oct 2022 10:53:04 -0400 Subject: [PATCH 05/79] Update `vrs.json`. Signed-off-by: Daniel Danis --- .../phenopackets/phenopackettools/validator/jsonschema/vrs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json index 555e2a55..ca2366be 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema#", + "$schema": "http://json-schema.org/draft-07/schema", "title": "GA4GH-VRS-Definitions", "type": "object", "definitions": { From 2baec166ca2ef40af0e376e044065da63e70065e Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 18 Oct 2022 11:53:47 -0400 Subject: [PATCH 06/79] Adding API url to README --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5fbeddb6..d76c0f43 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,5 @@ pfx-tools validate family ~/phenopacket-examples/families/*.json pfx-tools convert phenopacket.json ``` - -see this for VRS -- https://github.com/ga4gh/vrs/blob/76542a903b913110e67811885a8958625bbc3aae/schema/vrs.json -import it like vrsatile \ No newline at end of file +### API +A Javadoc description of the API of phenopacket-tools is available [here](https://javadoc.io/doc/org.phenopackets.phenopackettools). \ No newline at end of file From d38c9a7ad0c4af99a0d713d0d669aeddc5ea88ff Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 18 Oct 2022 17:05:09 -0400 Subject: [PATCH 07/79] Implement ancestry validation of HPO terms. Signed-off-by: Daniel Danis --- .../phenotype/HpoPhenotypeValidators.java | 106 ++++++++++++- .../AbstractHpoAncestryValidator.java | 143 ++++++++++++++++++ .../ancestry/CohortHpoAncestryValidator.java | 19 +++ .../ancestry/FamilyHpoAncestryValidator.java | 26 ++++ .../PhenopacketHpoAncestryValidator.java | 19 +++ .../core/phenotype/ancestry/package-info.java | 6 + .../core/phenotype/base/BaseHpoValidator.java | 19 +++ .../core/phenotype/base/package-info.java | 5 + .../AbstractHpoPhenotypeValidator.java} | 47 +++--- .../CohortHpoPhenotypeValidator.java | 6 +- .../FamilyHpoPhenotypeValidator.java | 6 +- .../PhenopacketHpoPhenotypeValidator.java | 6 +- .../core/phenotype/primary/package-info.java | 7 + .../phenotype/AncestryHpoValidatorTest.java | 118 +++++++++++++++ ... => PrimaryHpoPhenotypeValidatorTest.java} | 19 ++- 15 files changed, 503 insertions(+), 49 deletions(-) create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java rename phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/{BaseHpoPhenotypeValidator.java => primary/AbstractHpoPhenotypeValidator.java} (57%) rename phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/{ => primary}/CohortHpoPhenotypeValidator.java (85%) rename phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/{ => primary}/FamilyHpoPhenotypeValidator.java (89%) rename phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/{ => primary}/PhenopacketHpoPhenotypeValidator.java (83%) create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java create mode 100644 phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java rename phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/{HpoPhenotypeValidatorTest.java => PrimaryHpoPhenotypeValidatorTest.java} (95%) diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java index 1dbd6f54..18271c6b 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java @@ -2,6 +2,12 @@ import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.CohortHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.FamilyHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.PhenopacketHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.primary.CohortHpoPhenotypeValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.primary.FamilyHpoPhenotypeValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.primary.PhenopacketHpoPhenotypeValidator; import org.phenopackets.schema.v2.*; /** @@ -17,27 +23,117 @@ private HpoPhenotypeValidators() { * Get {@link PhenopacketValidator} to validate {@link Phenopacket} using provided {@link Ontology}. * * @param hpo HPO ontology + * @deprecated use {@link Primary#phenopacketHpoPhenotypeValidator(Ontology)} instead */ + // TODO - remove prior v1 + @Deprecated(forRemoval = true) public static PhenopacketValidator phenopacketHpoPhenotypeValidator(Ontology hpo) { - return new PhenopacketHpoPhenotypeValidator(hpo); + return Primary.phenopacketHpoPhenotypeValidator(hpo); } /** - * Get {@link PhenopacketValidator} to validate {@link Family} using provided {@link Ontology}. + * Get {@link PhenopacketValidator} for validate {@link Family} using provided {@link Ontology}. * * @param hpo HPO ontology + * @deprecated use {@link Primary#familyHpoPhenotypeValidator(Ontology)} instead */ + // TODO - remove prior v1 + @Deprecated(forRemoval = true) public static PhenopacketValidator familyHpoPhenotypeValidator(Ontology hpo) { - return new FamilyHpoPhenotypeValidator(hpo); + return Primary.familyHpoPhenotypeValidator(hpo); } /** - * Get {@link PhenopacketValidator} to validate {@link Cohort} using provided {@link Ontology}. + * Get {@link PhenopacketValidator} for performing primary validation {@link Cohort} using provided {@link Ontology}, + * as described in {@link org.phenopackets.phenopackettools.validator.core.phenotype.primary.AbstractHpoPhenotypeValidator}. * * @param hpo HPO ontology + * @deprecated use {@link Primary#cohortHpoPhenotypeValidator(Ontology)} instead */ + // TODO - remove prior v1 + @Deprecated(forRemoval = true) public static PhenopacketValidator cohortHpoPhenotypeValidator(Ontology hpo) { - return new CohortHpoPhenotypeValidator(hpo); + return Primary.cohortHpoPhenotypeValidator(hpo); + } + + /** + * A static factory class for providing {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s + * that check if HPO terms of the Phenopacket schema elements are present in + * a given {@link org.monarchinitiative.phenol.ontology.data.Ontology} and if the terms are non-obsolete. + */ + public static class Primary { + /** + * Get {@link PhenopacketValidator} to validate {@link Phenopacket} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator phenopacketHpoPhenotypeValidator(Ontology hpo) { + return new PhenopacketHpoPhenotypeValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} for validate {@link Family} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator familyHpoPhenotypeValidator(Ontology hpo) { + return new FamilyHpoPhenotypeValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} for performing primary validation {@link Cohort} using provided {@link Ontology}, + * as described in {@link org.phenopackets.phenopackettools.validator.core.phenotype.primary.AbstractHpoPhenotypeValidator}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator cohortHpoPhenotypeValidator(Ontology hpo) { + return new CohortHpoPhenotypeValidator(hpo); + } + } + + /** + * A static factory class for providing validators for pointing out violations of the annotation propagation rule. + *

+ * The validator checks observed and excluded phenotype terms. The observed terms are checked for a presence of + * an observed or an excluded ancestor, and a presence of such ancestor is pointed out as an error. + * For instance, Abnormality of finger or "NOT" Abnormality of finger must not be present + * in a patient annotated by Arachnodactyly. The most specific term (Arachnodactyly) must be used. + *

+ * For the excluded terms, the validator checks for presence of an excluded children. Here, the least specific term + * must be used. For instance, "NOT" Arachnodactyly must not be present in a patient annotated + * with "NOT" Abnormality of finger. Only the "NOT" Abnormality of finger must be used. + */ + public static class Ancestry { + + private Ancestry() { + } + + /** + * Get {@link PhenopacketValidator} to validate ancestry {@link Phenopacket} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator phenopacketHpoAncestryValidator(Ontology hpo) { + return new PhenopacketHpoAncestryValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} to validate ancestry {@link Family} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator familyHpoAncestryValidator(Ontology hpo) { + return new FamilyHpoAncestryValidator(hpo); + } + + /** + * Get {@link PhenopacketValidator} to validate ancestry {@link Cohort} using provided {@link Ontology}. + * + * @param hpo HPO ontology + */ + public static PhenopacketValidator cohortHpoAncestryValidator(Ontology hpo) { + return new CohortHpoAncestryValidator(hpo); + } } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java new file mode 100644 index 00000000..c5b9c887 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java @@ -0,0 +1,143 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.Term; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A class for pointing out violations of the annotation propagation rule. + *

+ * The validator checks observed and excluded phenotype terms. The observed terms are checked for a presence of + * an observed or an excluded ancestor, and a presence of such ancestor is pointed out as an error. + * For instance, Abnormality of finger or "NOT" Abnormality of finger must not be present + * in a patient annotated by Arachnodactyly. The most specific term (Arachnodactyly) must be used. + *

+ * For the excluded terms, the validator checks for presence of an excluded children. Here, the least specific term + * must be used. For instance, "NOT" Arachnodactyly must not be present in a patient annotated + * with "NOT" Abnormality of finger. Only the "NOT" Abnormality of finger must be used. + */ +public abstract class AbstractHpoAncestryValidator extends BaseHpoValidator { + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractHpoAncestryValidator.class); + + private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( + "HpoAncestryValidator", + "HPO ancestry phenotypic feature validator", + "Validate that phenopacket does not contain an HPO term and its ancestor based on the provided HPO"); + private static final String APR_VIOLATION = "Violation of the annotation propagation rule"; + private static final String UNKNOWN = "UNKNOWN_NAME"; + + AbstractHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + public ValidatorInfo validatorInfo() { + return VALIDATOR_INFO; + } + + @Override + public List validate(T component) { + return extractPhenopackets(component) + .flatMap(pp -> validatePhenopacketPhenotypicFeatures(pp.getId(), pp.getPhenotypicFeaturesList())) + .toList(); + } + + protected abstract Stream extractPhenopackets(T message); + + private Stream validatePhenopacketPhenotypicFeatures(String id, List phenotypicFeatures) { + Map> featuresByExclusion = phenotypicFeatures.stream() + .map(toMaybeObservedTermId()) + .flatMap(Optional::stream) + // Use `partitioningBy` instead of `groupingBy` to ensure the map contains keys + // for both `true` and `false`. Then extract `TermId` and collect in a `Set`. + .collect(Collectors.partitioningBy(MaybeExcludedTermId::excluded, + Collectors.mapping(MaybeExcludedTermId::termId, Collectors.toSet()))); + + + Stream.Builder results = Stream.builder(); + + // Check that the component does not contain both observed term and its ancestor. + Set allObserved = featuresByExclusion.get(false); + Set allExcluded = featuresByExclusion.get(true); + for (TermId observed : allObserved) { + for (TermId ancestor : OntologyAlgorithm.getAncestorTerms(hpo, observed, false)) { + if (allObserved.contains(ancestor)) + results.add(constructResultForAnObservedTerm(id, observed, ancestor, false)); + if (allExcluded.contains(ancestor)) + results.add(constructResultForAnObservedTerm(id, observed, ancestor, true)); + } + } + + // Check that the component does not have negated descendant + for (TermId excluded : allExcluded) { + for (TermId child : OntologyAlgorithm.getDescendents(hpo, excluded)) { + if (child.equals(excluded)) + // skip the parent term + continue; + if (allExcluded.contains(child)) + results.add(constructResultForAnExcludedTerm(id, excluded, child)); + } + } + + return results.build(); + } + + private static Function> toMaybeObservedTermId() { + return pf -> { + TermId termId; + try { + termId = TermId.of(pf.getType().getId()); + } catch (PhenolRuntimeException e) { + LOGGER.warn("Skipping ancestry validation of malformed term ID {}", pf.getType().getId()); + return Optional.empty(); + } + return Optional.of(new MaybeExcludedTermId(termId, pf.getExcluded())); + }; + } + + private ValidationResult constructResultForAnObservedTerm(String id, TermId observedId, TermId ancestorId, boolean ancestorIsExcluded) { + Term observedTerm = hpo.getTermMap().get(observedId); + String observedTermName = observedTerm == null ? UNKNOWN : observedTerm.getName(); + Term ancestorTerm = hpo.getTermMap().get(ancestorId); + String ancestorTermName = ancestorTerm == null ? UNKNOWN : ancestorTerm.getName(); + String message; + if (ancestorIsExcluded) + message = "Phenotypic features of %s must not contain both an observed term (%s, %s) and an excluded ancestor (%s, %s)".formatted( + id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); + else + message = "Phenotypic features of %s must not contain both an observed term (%s, %s) and an observed ancestor (%s, %s)".formatted( + id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); + + return ValidationResult.error(VALIDATOR_INFO, APR_VIOLATION, message); + } + + private ValidationResult constructResultForAnExcludedTerm(String id, TermId excluded, TermId child) { + Term excludedTerm = hpo.getTermMap().get(excluded); + String excludedTermName = excludedTerm == null ? UNKNOWN : excludedTerm.getName(); + Term childTerm = hpo.getTermMap().get(child); + String childTermName = childTerm == null ? UNKNOWN : childTerm.getName(); + String message = "Phenotypic features of %s must not contain both an excluded term (%s, %s) and an excluded child (%s, %s)".formatted( + id, excludedTermName, excluded.getValue(), childTermName, child.getValue()); + + return ValidationResult.error(VALIDATOR_INFO, APR_VIOLATION, message); + } + + private record MaybeExcludedTermId(TermId termId, boolean excluded) { + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java new file mode 100644 index 00000000..c68a3517 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/CohortHpoAncestryValidator.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.schema.v2.CohortOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.stream.Stream; + +public class CohortHpoAncestryValidator extends AbstractHpoAncestryValidator { + + public CohortHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + protected Stream extractPhenopackets(CohortOrBuilder message) { + return message.getMembersList().stream(); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java new file mode 100644 index 00000000..66ba7e88 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/FamilyHpoAncestryValidator.java @@ -0,0 +1,26 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.stream.Stream; + +public class FamilyHpoAncestryValidator extends AbstractHpoAncestryValidator { + + public FamilyHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + protected Stream extractPhenopackets(FamilyOrBuilder message) { + Stream.Builder builder = Stream.builder(); + builder.accept(message.getProband()); + + for (Phenopacket relative : message.getRelativesList()) + builder.add(relative); + + return builder.build(); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java new file mode 100644 index 00000000..b23ca49a --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/PhenopacketHpoAncestryValidator.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.stream.Stream; + +public class PhenopacketHpoAncestryValidator extends AbstractHpoAncestryValidator { + + public PhenopacketHpoAncestryValidator(Ontology hpo) { + super(hpo); + } + + @Override + protected Stream extractPhenopackets(PhenopacketOrBuilder message) { + return Stream.of(message); + } + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java new file mode 100644 index 00000000..234f7f14 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/package-info.java @@ -0,0 +1,6 @@ +/** + * The package contains validators that point out violations of the annotation propagation rule. + * + * @see org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.Ancestry + */ +package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java new file mode 100644 index 00000000..a908efb6 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.base; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; + +import java.util.Objects; + +public abstract class BaseHpoValidator implements PhenopacketValidator { + + protected final Ontology hpo; + protected final String hpoVersion; + + protected BaseHpoValidator(Ontology hpo) { + this.hpo = Objects.requireNonNull(hpo); + // TODO - can be replaced by this.hpo.version() in the most recent phenol versions. + this.hpoVersion = this.hpo.getMetaInfo().getOrDefault("data-version", "HPO"); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java new file mode 100644 index 00000000..eb9cb4de --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/package-info.java @@ -0,0 +1,5 @@ +/** + * Shared bits of all {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s + * that use HPO {@link org.monarchinitiative.phenol.ontology.data.Ontology} in validation. + */ +package org.phenopackets.phenopackettools.validator.core.phenotype.base; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/BaseHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java similarity index 57% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/BaseHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java index dad25e54..141dc0ef 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/BaseHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java @@ -1,16 +1,16 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import com.google.protobuf.MessageOrBuilder; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.phenopackettools.validator.core.*; +import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; import org.phenopackets.schema.v2.core.PhenotypicFeature; -import java.util.Objects; import java.util.stream.Stream; -abstract class BaseHpoPhenotypeValidator implements PhenopacketValidator { +public abstract class AbstractHpoPhenotypeValidator extends BaseHpoValidator { private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "HpoPhenotypeValidator", @@ -19,12 +19,8 @@ abstract class BaseHpoPhenotypeValidator implements private static final String INVALID_TERM_ID = "Invalid TermId"; private static final String OBSOLETED_TERM_ID = "Obsoleted TermId"; - private final Ontology hpo; - private final String hpoVersion; - - public BaseHpoPhenotypeValidator(Ontology hpo) { - this.hpo = Objects.requireNonNull(hpo); - this.hpoVersion = this.hpo.getMetaInfo().getOrDefault("data-version", "HPO"); + public AbstractHpoPhenotypeValidator(Ontology hpo) { + super(hpo); } @Override @@ -43,23 +39,24 @@ protected Stream checkPhenotypeFeature(String indivi ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) ); } + if (termId.getPrefix().equals("HP")) { + // Check if the HPO contains the term. + if (!hpo.containsTerm(termId)) { + String msg = "%s in '%s' not found in %s".formatted(termId.getValue(), individualId, hpoVersion); + return Stream.of( + ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) + ); + } - // Check if the HPO contains the term. - if (!hpo.containsTerm(termId)) { - String msg = "%s in '%s' not found in %s".formatted(termId.getValue(), individualId, hpoVersion); - return Stream.of( - ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) - ); - } - - // Check if the `termId` is a primary ID. // If not, this is a warning. - TermId primaryId = hpo.getPrimaryTermId(termId); - if (!primaryId.equals(termId)) { - String msg = "Using obsoleted id (%s) instead of current primary id (%s) in '%s'" - .formatted(termId.getValue(), primaryId.getValue(), individualId); - return Stream.of( - ValidationResult.warning(VALIDATOR_INFO, OBSOLETED_TERM_ID, msg) - ); + // Check if the `termId` is a primary ID. // If not, this is a warning. + TermId primaryId = hpo.getPrimaryTermId(termId); + if (!primaryId.equals(termId)) { + String msg = "Using obsoleted id (%s) instead of current primary id (%s) in '%s'" + .formatted(termId.getValue(), primaryId.getValue(), individualId); + return Stream.of( + ValidationResult.warning(VALIDATOR_INFO, OBSOLETED_TERM_ID, msg) + ); + } } return Stream.empty(); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/CohortHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java similarity index 85% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/CohortHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java index f72bbc70..c396a0c1 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/CohortHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; @@ -10,9 +10,9 @@ import java.util.ArrayList; import java.util.List; -class CohortHpoPhenotypeValidator extends BaseHpoPhenotypeValidator { +public class CohortHpoPhenotypeValidator extends AbstractHpoPhenotypeValidator { - CohortHpoPhenotypeValidator(Ontology hpo) { + public CohortHpoPhenotypeValidator(Ontology hpo) { super(hpo); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/FamilyHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java similarity index 89% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/FamilyHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java index 1512f25b..ad65f9b2 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/FamilyHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; @@ -10,9 +10,9 @@ import java.util.ArrayList; import java.util.List; -class FamilyHpoPhenotypeValidator extends BaseHpoPhenotypeValidator { +public class FamilyHpoPhenotypeValidator extends AbstractHpoPhenotypeValidator { - FamilyHpoPhenotypeValidator(Ontology hpo) { + public FamilyHpoPhenotypeValidator(Ontology hpo) { super(hpo); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/PhenopacketHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java similarity index 83% rename from phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/PhenopacketHpoPhenotypeValidator.java rename to phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java index c30b2b45..96fda6c6 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/PhenopacketHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.validator.core.phenotype; +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; @@ -9,9 +9,9 @@ import java.util.ArrayList; import java.util.List; -class PhenopacketHpoPhenotypeValidator extends BaseHpoPhenotypeValidator { +public class PhenopacketHpoPhenotypeValidator extends AbstractHpoPhenotypeValidator { - PhenopacketHpoPhenotypeValidator(Ontology hpo) { + public PhenopacketHpoPhenotypeValidator(Ontology hpo) { super(hpo); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java new file mode 100644 index 00000000..c233fd38 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/package-info.java @@ -0,0 +1,7 @@ +/** + * The package of {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s that perform + * primary validation of HPO terms. + * + * @see org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.Primary + */ +package org.phenopackets.phenopackettools.validator.core.phenotype.primary; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java new file mode 100644 index 00000000..091004bc --- /dev/null +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java @@ -0,0 +1,118 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.phenopackettools.validator.core.TestData; +import org.phenopackets.phenopackettools.validator.core.ValidationLevel; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.List; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +public class AncestryHpoValidatorTest { + + private static final Ontology HPO = TestData.HPO; + + @Nested + public class PhenopacketTest { + + private PhenopacketValidator validator; + + @BeforeEach + public void setUp() { + validator = HpoPhenotypeValidators.Ancestry.phenopacketHpoAncestryValidator(HPO); + } + + @Test + public void testValidInput() { + // Has some Abnormality of finger but no Arachnodactyly. + Phenopacket pp = createPhenopacket( + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, is(empty())); + } + + @Test + public void testFailsIfTermAndAncestorIsObserved() { + // Has some Abnormality of finger and Arachnodactyly. Only Arachnodactyly should be present. + Phenopacket pp = createPhenopacket( + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.validatorInfo(), equalTo(validator.validatorInfo())); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); + assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an observed term (Arachnodactyly, HP:0001166) and an observed ancestor (Abnormality of finger, HP:0001167)")); + } + + @Test + public void testFailsIfTermAndAncestorIsExcluded() { + // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. + Phenopacket pp = createPhenopacket( + createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); + assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an excluded term (Abnormality of finger, HP:0001167) and an excluded child (Arachnodactyly, HP:0001166)")); + } + + @Test + public void testFailsIfTermIsPresentAndAncestorIsExcluded() { + // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. + Phenopacket pp = createPhenopacket( + createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) + ).build(); + + List results = validator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); + assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an observed term (Arachnodactyly, HP:0001166) and an excluded ancestor (Abnormality of finger, HP:0001167)")); + } + + private static Phenopacket.Builder createPhenopacket(PhenotypicFeature excludedArachnodactyly, PhenotypicFeature observedAbnormalityOfFinger) { + return Phenopacket.newBuilder() + .setId("example-phenopacket") + .addPhenotypicFeatures(excludedArachnodactyly) + .addPhenotypicFeatures(observedAbnormalityOfFinger); + } + + private static PhenotypicFeature createPhenotypicFeature(String id, String label, boolean excluded) { + return PhenotypicFeature.newBuilder() + .setType(OntologyClass.newBuilder() + .setId(id) + .setLabel(label) + .build()) + .setExcluded(excluded) + .build(); + } + } + +} diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java similarity index 95% rename from phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidatorTest.java rename to phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java index 822167ea..a9ba8567 100644 --- a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidatorTest.java +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java @@ -5,30 +5,29 @@ import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; import org.phenopackets.phenopackettools.validator.core.TestData; import org.phenopackets.phenopackettools.validator.core.ValidationLevel; import org.phenopackets.phenopackettools.validator.core.ValidationResult; -import org.phenopackets.schema.v2.Cohort; -import org.phenopackets.schema.v2.Family; -import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.*; import java.util.List; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; -public class HpoPhenotypeValidatorTest { +public class PrimaryHpoPhenotypeValidatorTest { private static final Ontology HPO = TestData.HPO; @Nested public class PhenopacketTest { - private PhenopacketHpoPhenotypeValidator validator; + private PhenopacketValidator validator; @BeforeEach public void setUp() { - validator = new PhenopacketHpoPhenotypeValidator(HPO); + validator = HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(HPO); } @Test @@ -152,11 +151,11 @@ public void testMistypedTermId() throws Exception { */ @Nested public class FamilyTest { - private FamilyHpoPhenotypeValidator validator; + private PhenopacketValidator validator; @BeforeEach public void setUp() { - validator = new FamilyHpoPhenotypeValidator(HPO); + validator = HpoPhenotypeValidators.Primary.familyHpoPhenotypeValidator(HPO); } @Test @@ -306,11 +305,11 @@ public void testInvalidIdInRelative() throws Exception { @Nested public class CohortTest { - private CohortHpoPhenotypeValidator validator; + private PhenopacketValidator validator; @BeforeEach public void setUp() { - validator = new CohortHpoPhenotypeValidator(HPO); + validator = HpoPhenotypeValidators.Primary.cohortHpoPhenotypeValidator(HPO); } @Test From 52178b388edcb24f7e33cd03f87872410ce5748f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 18 Oct 2022 20:28:16 -0400 Subject: [PATCH 08/79] Implement Cohort and Family tests of the HPO "ancestry" validation functionality. Signed-off-by: Daniel Danis --- .../phenotype/AncestryHpoValidatorTest.java | 110 +++++++++++++++--- 1 file changed, 93 insertions(+), 17 deletions(-) diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java index 091004bc..8569bb1e 100644 --- a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java @@ -8,11 +8,11 @@ import org.phenopackets.phenopackettools.validator.core.TestData; import org.phenopackets.phenopackettools.validator.core.ValidationLevel; import org.phenopackets.phenopackettools.validator.core.ValidationResult; -import org.phenopackets.schema.v2.Phenopacket; -import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.*; import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.PhenotypicFeature; +import java.util.Arrays; import java.util.List; import static org.hamcrest.MatcherAssert.assertThat; @@ -36,7 +36,7 @@ public void setUp() { public void testValidInput() { // Has some Abnormality of finger but no Arachnodactyly. Phenopacket pp = createPhenopacket( - createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) ).build(); @@ -49,7 +49,7 @@ public void testValidInput() { public void testFailsIfTermAndAncestorIsObserved() { // Has some Abnormality of finger and Arachnodactyly. Only Arachnodactyly should be present. Phenopacket pp = createPhenopacket( - createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) ).build(); @@ -67,7 +67,7 @@ public void testFailsIfTermAndAncestorIsObserved() { public void testFailsIfTermAndAncestorIsExcluded() { // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. Phenopacket pp = createPhenopacket( - createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) ).build(); @@ -84,7 +84,7 @@ public void testFailsIfTermAndAncestorIsExcluded() { public void testFailsIfTermIsPresentAndAncestorIsExcluded() { // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. Phenopacket pp = createPhenopacket( - createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) ).build(); @@ -96,23 +96,99 @@ public void testFailsIfTermIsPresentAndAncestorIsExcluded() { assertThat(result.category(), equalTo("Violation of the annotation propagation rule")); assertThat(result.message(), equalTo("Phenotypic features of example-phenopacket must not contain both an observed term (Arachnodactyly, HP:0001166) and an excluded ancestor (Abnormality of finger, HP:0001167)")); } + } + + /** + * White-box testing - we know that the {@link PhenotypicFeature} is an attribute of a {@link Phenopacket}, so we + * test the validation logic extensively in {@link PhenopacketTest}. The {@link FamilyTest} test suite ensures + * there are not errors in valid input. + */ + @Nested + public class FamilyTest { + + private PhenopacketValidator validator; + + @BeforeEach + public void setUp() { + validator = HpoPhenotypeValidators.Ancestry.familyHpoAncestryValidator(HPO); + } + + @Test + public void testValidInput() { + Family family = Family.newBuilder() + .setProband(createPhenopacket("example-phenopacket", + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .addRelatives(createPhenopacket("dad-phenopacket", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .addRelatives(createPhenopacket("mom-phenopacket", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .build(); + + List results = validator.validate(family); + + assertThat(results, is(empty())); + } + } + + /** + * White-box testing (same as in {@link FamilyTest}) - we know that the {@link PhenotypicFeature} + * is an attribute of a {@link Phenopacket}, so we test the validation logic extensively + * in {@link PhenopacketTest}. The {@link CohortTest} test suite ensures there are not errors in valid input. + */ + @Nested + public class CohortTest { + + private PhenopacketValidator validator; - private static Phenopacket.Builder createPhenopacket(PhenotypicFeature excludedArachnodactyly, PhenotypicFeature observedAbnormalityOfFinger) { - return Phenopacket.newBuilder() - .setId("example-phenopacket") - .addPhenotypicFeatures(excludedArachnodactyly) - .addPhenotypicFeatures(observedAbnormalityOfFinger); + @BeforeEach + public void setUp() { + validator = HpoPhenotypeValidators.Ancestry.cohortHpoAncestryValidator(HPO); } - private static PhenotypicFeature createPhenotypicFeature(String id, String label, boolean excluded) { - return PhenotypicFeature.newBuilder() - .setType(OntologyClass.newBuilder() - .setId(id) - .setLabel(label) + @Test + public void testValidInput() { + Cohort cohort = Cohort.newBuilder() + .addMembers(createPhenopacket("joe-phenopacket", + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) + .build()) + .addMembers(createPhenopacket("jim-phenopacket", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .addMembers(createPhenopacket("jane-phenopacket", + createPhenotypicFeature("HP:0001238", "Slender finger", false), + createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) .build()) - .setExcluded(excluded) .build(); + + List results = validator.validate(cohort); + + assertThat(results, is(empty())); } } + private static Phenopacket.Builder createPhenopacket(String phenopacketId, + PhenotypicFeature... features) { + return Phenopacket.newBuilder() + .setId(phenopacketId) + .addAllPhenotypicFeatures(Arrays.asList(features)); + } + + private static PhenotypicFeature createPhenotypicFeature(String id, String label, boolean excluded) { + return PhenotypicFeature.newBuilder() + .setType(OntologyClass.newBuilder() + .setId(id) + .setLabel(label) + .build()) + .setExcluded(excluded) + .build(); + } + } From 91d0256c2933a647e64489a2ed23d5bbbbdfbab5 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 18 Oct 2022 20:29:50 -0400 Subject: [PATCH 09/79] Stop using the deprecated API Signed-off-by: Daniel Danis --- .../phenopackettools/command/ValidateCommand.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java index 50f25322..ce2fd647 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -143,15 +143,15 @@ private List> configureSema // This method requires an appropriate combination of `T` and `element`, as described in Javadoc. // We suppress warning and perform an unchecked cast here, assuming `T` and `element` are appropriate. // The app will crash and burn if this is not the case. - PhenopacketValidator validator = switch (inputSection.element) { + PhenopacketValidator primary = switch (inputSection.element) { case PHENOPACKET -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.phenopacketHpoPhenotypeValidator(hpo); + (PhenopacketValidator) HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(hpo); case FAMILY -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.familyHpoPhenotypeValidator(hpo); + (PhenopacketValidator) HpoPhenotypeValidators.Primary.familyHpoPhenotypeValidator(hpo); case COHORT -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.cohortHpoPhenotypeValidator(hpo); + (PhenopacketValidator) HpoPhenotypeValidators.Primary.cohortHpoPhenotypeValidator(hpo); }; - validators.add(validator); + validators.add(primary); } LOGGER.debug("Configured {} semantic validator(s)", validators.size()); From 7c979ed60c33c6bac7222094aae716bbd11aba1d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 18 Oct 2022 20:33:18 -0400 Subject: [PATCH 10/79] Use the ancestry validator when HPO is provided. Signed-off-by: Daniel Danis --- .../command/ValidateCommand.java | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java index ce2fd647..86a36979 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -143,15 +143,26 @@ private List> configureSema // This method requires an appropriate combination of `T` and `element`, as described in Javadoc. // We suppress warning and perform an unchecked cast here, assuming `T` and `element` are appropriate. // The app will crash and burn if this is not the case. - PhenopacketValidator primary = switch (inputSection.element) { - case PHENOPACKET -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(hpo); - case FAMILY -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.Primary.familyHpoPhenotypeValidator(hpo); - case COHORT -> //noinspection unchecked - (PhenopacketValidator) HpoPhenotypeValidators.Primary.cohortHpoPhenotypeValidator(hpo); + switch (inputSection.element) { + case PHENOPACKET -> { + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Primary.phenopacketHpoPhenotypeValidator(hpo)); + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.phenopacketHpoAncestryValidator(hpo)); + } + case FAMILY -> { + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Primary.familyHpoPhenotypeValidator(hpo)); + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.familyHpoAncestryValidator(hpo)); + } + case COHORT -> { + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Primary.cohortHpoPhenotypeValidator(hpo)); + //noinspection unchecked + validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.cohortHpoAncestryValidator(hpo)); + } }; - validators.add(primary); } LOGGER.debug("Configured {} semantic validator(s)", validators.size()); From f36b9e5ef7c1a376e2a1492f15c0c51bd8ae9e7a Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Wed, 19 Oct 2022 15:49:39 -0400 Subject: [PATCH 11/79] revising our examples --- .../examples/BethlehamMyopathy.java | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java index d7bb1d11..587434c0 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java @@ -1,10 +1,13 @@ package org.phenopackets.phenopackettools.examples; +import org.ga4gh.vrsatile.v1.GeneDescriptor; +import org.ga4gh.vrsatile.v1.VcfRecord; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.phenopackettools.builder.constants.Status; import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.GenomicInterpretation; import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; @@ -24,17 +27,9 @@ public BethlehamMyopathy() { .addResource(Resources.genoVersion("2020-03-08")) .addExternalReference(authorAssertion.getReference()) .build(); - var variationDescriptor = - VariationDescriptorBuilder.builder("variant id") - .heterozygous() - .hgvs("NM_001848.2:c.877G>A") - .build(); - var col6a1VariantInterpretation = - VariantInterpretationBuilder.of(variationDescriptor, Status.pathogenic()); - var genomicInterpretation = - GenomicInterpretationBuilder.builder(INTERPRETATION_ID) - .causative() - .variantInterpretation(col6a1VariantInterpretation).build(); + + var genomicInterpretation = COL6A1variant(); + var diagnosis = DiagnosisBuilder.builder(bethlehamMyopathy).addGenomicInterpretation(genomicInterpretation).build(); var interpretation = InterpretationBuilder.builder(INTERPRETATION_ID).completed(diagnosis); var ventricularSeptalDefect = @@ -100,6 +95,22 @@ public BethlehamMyopathy() { .build(); } + + private GenomicInterpretation COL6A1variant() { + var variationDescriptor = + VariationDescriptorBuilder.builder("variant id") + .heterozygous() + .hgvs("NM_001848.2:c.877G>A") + .geneContext(GeneDescriptorBuilder.of("HGNC:2211", "COL6A1")) + .vcfHg38("chr21",45989626, "G","A") + .build(); + var col6a1VariantInterpretation = + VariantInterpretationBuilder.of(variationDescriptor, Status.pathogenic()); + return GenomicInterpretationBuilder.builder(INTERPRETATION_ID) + .causative() + .variantInterpretation(col6a1VariantInterpretation).build(); + } + @Override public Phenopacket getPhenopacket() { return phenopacket; From e12b33c5fdd35b2b08ef829441c8e055b5fe461d Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Wed, 19 Oct 2022 17:09:25 -0400 Subject: [PATCH 12/79] Adding HGNC Resource builder --- .../phenopackettools/builder/builders/Resources.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index aa686383..2747981d 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -84,6 +84,15 @@ private Resources() { .setUrl("http://purl.obolibrary.org/obo/uo.owl") .setIriPrefix("http://purl.obolibrary.org/obo/UO_"); + private static final Resource.Builder HGNC_BUILDER = Resource.newBuilder() + .setId("hgnc") + .setName("HUGO Gene Nomenclature Committee") + .setNamespacePrefix("HGNC") + .setUrl("https://www.genenames.org") + .setIriPrefix("https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/"); + + public static Resource hgncVersion(String version) { return HGNC_BUILDER.setVersion(version).build(); } + public static Resource hpoVersion(String version) { return HPO_BUILDER.setVersion(version).build(); } From e62f6025a7a29bd82d7eb599cb87893faa824a23 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 20 Oct 2022 13:03:45 -0400 Subject: [PATCH 13/79] Fix kilogram. Signed-off-by: Daniel Danis --- constants/Unit.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constants/Unit.tsv b/constants/Unit.tsv index 4a544c6f..8f8b0d57 100644 --- a/constants/Unit.tsv +++ b/constants/Unit.tsv @@ -3,7 +3,7 @@ UCUM:degree degree (plane angle) DEGREE degreeOfAngle UCUM:[diop] diopter DIOPTER diopter UCUM:g gram GRAM gram UCUM:g/kg gram per kilogram GRAM_PER_KG gramPerKilogram -UCUM:kg kiligram KILIGRAM kilogram +UCUM:kg kilogram KILOGRAM kilogram UCUM:L liter LITER liter UCUM:m meter METER meter UCUM:ug microgram MICROGRAM microgram From 358ae4bcdf57bd9f153935150ab3bb2eaadfaaed Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 20 Oct 2022 13:05:10 -0400 Subject: [PATCH 14/79] Split `HP:0032540` ontology label in the `SpatialPattern.tsv`. Signed-off-by: Daniel Danis --- constants/SpatialPattern.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constants/SpatialPattern.tsv b/constants/SpatialPattern.tsv index ad430e2b..687a256f 100644 --- a/constants/SpatialPattern.tsv +++ b/constants/SpatialPattern.tsv @@ -31,4 +31,4 @@ HP:0012840 Proximal PROXIMAL proximal HP:0033820 Apical APICAL apical HP:0030650 Focal FOCAL focal HP:0030651 Multifocal MULTIFOCAL multifocal -HP:0032540 Jointflexorsurfacelocalization JOINT_FLEXOR_SURFACE_LOCALIZATION jointFlexorSurfaceLocalization +HP:0032540 Joint flexor surface localization JOINT_FLEXOR_SURFACE_LOCALIZATION jointFlexorSurfaceLocalization From ccb97011e4dbeb75765e05c57bcb9f8c5a07b814 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 20 Oct 2022 13:07:01 -0400 Subject: [PATCH 15/79] Update function names in `MedicalActions.tsv`. Signed-off-by: Daniel Danis --- constants/MedicalActions.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/constants/MedicalActions.tsv b/constants/MedicalActions.tsv index 0f1577e3..a8c483ef 100644 --- a/constants/MedicalActions.tsv +++ b/constants/MedicalActions.tsv @@ -1,10 +1,10 @@ ontology.id ontology.label variable.name function.name NCIT:C41331 Adverse Event ADVERSE_EVENT adverseEvent -NCIT:C64530 Four Times Daily FOUR_TIMES_DAILY fourtimesDaily +NCIT:C64530 Four Times Daily FOUR_TIMES_DAILY fourTimesDaily NCIT:C38222 Intraarterial Route of Administration INTRA_ARTERIAL intraArterialAdministration NCIT:C38276 Intravenous Route of Administration IV_ADMINISTRATION intravenousAdministration NCIT:C38288 Oral Route of Administration ORAL_ADMINISTRATION oralAdministration NCIT:C64576 Once ONCE once NCIT:C125004 Once Daily ONCE_DAILY onceDaily -NCIT:C64527 Three Times Daily THREE_TIMES_DAILY threetimesDaily +NCIT:C64527 Three Times Daily THREE_TIMES_DAILY threeTimesDaily NCIT:C64496 Twice Daily TWICE_DAILY twiceDaily From 1f47ca52d0a55177a466ac80c40bb27d5e7887e9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 20 Oct 2022 13:09:28 -0400 Subject: [PATCH 16/79] Update tabs in `create_rtd.py`. Signed-off-by: Daniel Danis --- constants/create_rtd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constants/create_rtd.py b/constants/create_rtd.py index d5620605..712689b9 100644 --- a/constants/create_rtd.py +++ b/constants/create_rtd.py @@ -89,4 +89,4 @@ def create_csv_table(entry, fh): with open(RTD_PATH, "wt") as fh: fh.write(RTD_HEADER) for e in entries: - create_csv_table(e, fh=fh) + create_csv_table(e, fh=fh) From 0c63fd417839951824e64a99067db7f6e6fcb9f6 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 20 Oct 2022 13:10:51 -0400 Subject: [PATCH 17/79] Update constants docs and Java code. Signed-off-by: Daniel Danis --- docs/constants.rst | 8 ++++---- .../builder/constants/MedicalActions.java | 4 ++-- .../builder/constants/SpatialPattern.java | 2 +- .../phenopackettools/builder/constants/Unit.java | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/constants.rst b/docs/constants.rst index 31dbd431..2c3818eb 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -94,13 +94,13 @@ Terms from the `NCI Thesaurus `_ are :widths: 30, 200, 200 "NCIT:C41331", "Adverse Event", "adverseEvent()" - "NCIT:C64530", "Four Times Daily", "fourtimesDaily()" + "NCIT:C64530", "Four Times Daily", "fourTimesDaily()" "NCIT:C38222", "Intraarterial Route of Administration", "intraArterialAdministration()" "NCIT:C38276", "Intravenous Route of Administration", "intravenousAdministration()" "NCIT:C38288", "Oral Route of Administration", "oralAdministration()" "NCIT:C64576", "Once", "once()" "NCIT:C125004", "Once Daily", "onceDaily()" - "NCIT:C64527", "Three Times Daily", "threetimesDaily()" + "NCIT:C64527", "Three Times Daily", "threeTimesDaily()" "NCIT:C64496", "Twice Daily", "twiceDaily()" @@ -213,7 +213,7 @@ Modifier terms from the `HPO `_ are used to describe s "HP:0033820", "Apical", "apical()" "HP:0030650", "Focal", "focal()" "HP:0030651", "Multifocal", "multifocal()" - "HP:0032540", "Jointflexorsurfacelocalization", "jointFlexorSurfaceLocalization()" + "HP:0032540", "Joint flexor surface localization", "jointFlexorSurfaceLocalization()" Unit @@ -229,7 +229,7 @@ With some exceptions, terms from the `The Unified Code for Units of Measure Date: Thu, 20 Oct 2022 13:53:29 -0400 Subject: [PATCH 18/79] Fix method name. Signed-off-by: Daniel Danis --- .../examples/SevereStatinInducedAutoimmuneMyopathy.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java index 8947bdf3..36ebadcf 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java @@ -86,7 +86,7 @@ List previousTreatments() { var metformin = ontologyClass( "DrugCentral:1725", "metformin"); var fiveHundredMg = QuantityBuilder.builder(milligram(), 500).build(); var metforminAction = MedicalActionBuilder - .oralAdministration(metformin, fiveHundredMg, threetimesDaily(), interval) + .oralAdministration(metformin, fiveHundredMg, threeTimesDaily(), interval) .build(); return List.of(atorvastatinAction, aspirinAction, ramiprilAction, metforminAction); } From 61693df66eae85901ecb57310dbb49579be65d38 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 31 Oct 2022 17:06:21 -0400 Subject: [PATCH 19/79] Implement I/O, add core module, harmonize phenopacket-tools exception hierarchy. Signed-off-by: Daniel Danis --- phenopacket-tools-builder/pom.xml | 5 + .../src/main/java/module-info.java | 3 +- .../builder/FamilyBuilder.java | 8 +- .../builder/builders/Ages.java | 4 +- .../builder/builders/CopyNumberBuilder.java | 4 +- .../PhenotoolsRuntimeException.java | 6 - .../builder/builders/TimeElementsTest.java | 5 +- phenopacket-tools-cli/pom.xml | 22 +- .../src/main/java/module-info.java | 1 + .../command/BaseIOCommand.java | 143 +++----- .../command/ConvertCommand.java | 3 +- .../command/ExamplesCommand.java | 6 +- .../command/ValidateCommand.java | 4 +- phenopacket-tools-converter/pom.xml | 4 - .../src/main/java/module-info.java | 2 + .../converter/converters/V1ToV2Converter.java | 2 +- .../converters/V1ToV2ConverterImpl.java | 26 +- phenopacket-tools-core/pom.xml | 14 + .../src/main/java/module-info.java | 3 + .../core}/PhenopacketElement.java | 2 +- .../core}/PhenopacketFormat.java | 2 +- .../core/PhenopacketSchemaVersion.java | 18 + .../core/PhenopacketToolsException.java | 28 ++ .../PhenopacketToolsRuntimeException.java | 28 ++ .../phenopackettools/core/package-info.java | 4 + phenopacket-tools-io/pom.xml | 46 +++ .../src/main/java/module-info.java | 10 + .../io/PhenopacketParser.java | 79 +++++ .../io/PhenopacketParserFactory.java | 19 ++ .../io/PhenopacketParserFactoryException.java | 26 ++ .../io/PhenopacketParserFactoryImpl.java | 19 ++ .../io/base/BasePhenopacketParser.java | 51 +++ .../io/v1/V1PhenopacketParser.java | 40 +++ .../io/v2/V2PhenopacketParser.java | 39 +++ .../io/PhenopacketParserFactoryImplTest.java | 30 ++ .../phenopackettools/io/TestBase.java | 9 + .../io/v1/V1PhenopacketParserTest.java | 58 ++++ .../io/v2/V2PhenopacketParserTest.java | 59 ++++ .../phenopackettools/io/v1/README.md | 8 + .../phenopackettools/io/v1/cohort.json | 251 ++++++++++++++ .../phenopackettools/io/v1/cohort.pb | 80 +++++ .../phenopackettools/io/v1/family.json | 268 +++++++++++++++ .../phenopackettools/io/v1/family.pb | 83 +++++ .../phenopackettools/io/v1/phenopacket.json | 189 +++++++++++ .../phenopackettools/io/v1/phenopacket.pb | 60 ++++ .../phenopackettools/io/v2/cohort.json | 294 +++++++++++++++++ .../phenopackettools/io/v2/cohort.pb | Bin 0 -> 3099 bytes .../phenopackettools/io/v2/family.json | 311 ++++++++++++++++++ .../phenopackettools/io/v2/family.pb | Bin 0 -> 3130 bytes .../phenopackettools/io/v2/phenopacket.json | 220 +++++++++++++ .../phenopackettools/io/v2/phenopacket.pb | 71 ++++ phenopacket-tools-util/pom.xml | 8 + .../src/main/java/module-info.java | 2 + .../util/format/ElementSniffException.java | 25 ++ .../util/format/ElementSniffer.java | 48 +++ .../util/format/FormatSniffException.java | 2 +- .../util/format/FormatSniffer.java | 20 +- .../util/format/SniffException.java | 30 ++ .../phenopackettools/util/format/Util.java | 18 + .../util/format/FormatSnifferTest.java | 1 + phenopacket-tools-validator-core/pom.xml | 5 + .../src/main/java/module-info.java | 1 + .../validator/core/ConversionException.java | 6 +- .../except/PhenopacketValidatorException.java | 4 +- .../PhenopacketValidatorRuntimeException.java | 4 +- .../JsonSchemaValidationWorkflowRunner.java | 2 +- pom.xml | 26 +- 67 files changed, 2703 insertions(+), 166 deletions(-) delete mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java create mode 100644 phenopacket-tools-core/pom.xml create mode 100644 phenopacket-tools-core/src/main/java/module-info.java rename {phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format => phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core}/PhenopacketElement.java (95%) rename {phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format => phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core}/PhenopacketFormat.java (96%) create mode 100644 phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java create mode 100644 phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java create mode 100644 phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java create mode 100644 phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java create mode 100644 phenopacket-tools-io/pom.xml create mode 100644 phenopacket-tools-io/src/main/java/module-info.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java create mode 100644 phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java create mode 100644 phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java create mode 100644 phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java create mode 100644 phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.pb create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.json create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.pb create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java diff --git a/phenopacket-tools-builder/pom.xml b/phenopacket-tools-builder/pom.xml index 1d27d63d..39f876d0 100644 --- a/phenopacket-tools-builder/pom.xml +++ b/phenopacket-tools-builder/pom.xml @@ -13,6 +13,11 @@ phenopacket-tools-builder + + org.phenopackets.phenopackettools + phenopacket-tools-core + ${project.parent.version} + com.google.protobuf protobuf-java diff --git a/phenopacket-tools-builder/src/main/java/module-info.java b/phenopacket-tools-builder/src/main/java/module-info.java index 9ca32aca..cd4375ea 100644 --- a/phenopacket-tools-builder/src/main/java/module-info.java +++ b/phenopacket-tools-builder/src/main/java/module-info.java @@ -1,4 +1,6 @@ module org.phenopackets.phenopackettools.builder { + // No need to make it transitive since we only use runtime exceptions. + requires org.phenopackets.phenopackettools.core; requires transitive org.phenopackets.schema; // Required due to `TimestampBuilder`. //noinspection requires-transitive-automatic @@ -7,5 +9,4 @@ exports org.phenopackets.phenopackettools.builder; exports org.phenopackets.phenopackettools.builder.builders; exports org.phenopackets.phenopackettools.builder.constants; - exports org.phenopackets.phenopackettools.builder.exceptions; } \ No newline at end of file diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java index 9f8a5654..22c1256f 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/FamilyBuilder.java @@ -1,6 +1,6 @@ package org.phenopackets.phenopackettools.builder; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v2.Family; import org.phenopackets.schema.v2.Phenopacket; import org.phenopackets.schema.v2.core.File; @@ -57,11 +57,11 @@ public static FamilyBuilder create(String familyId) { public Family build() { if (! builder.hasMetaData()) { - throw new PhenotoolsRuntimeException("MetaData element missing from Family"); + throw new PhenopacketToolsRuntimeException("MetaData element missing from Family"); } else if (! builder.hasPedigree()) { - throw new PhenotoolsRuntimeException("Pedigree element missing from Family"); + throw new PhenopacketToolsRuntimeException("Pedigree element missing from Family"); } else if (! builder.hasProband()) { - throw new PhenotoolsRuntimeException("Proband Phenopacket element missing from Family"); + throw new PhenopacketToolsRuntimeException("Proband Phenopacket element missing from Family"); } return builder.build(); diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java index 4711e498..f063b3df 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Ages.java @@ -1,6 +1,6 @@ package org.phenopackets.phenopackettools.builder.builders; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v2.core.Age; import org.phenopackets.schema.v2.core.AgeRange; import org.phenopackets.schema.v2.core.GestationalAge; @@ -17,7 +17,7 @@ public static Age age(String iso8601duration) { try { Period.parse(iso8601duration); } catch (DateTimeParseException ex) { - throw new PhenotoolsRuntimeException("Invalid iso8601 age (period) string: \"" + iso8601duration + "\"."); + throw new PhenopacketToolsRuntimeException("Invalid iso8601 age (period) string: \"" + iso8601duration + "\"."); } return Age.newBuilder().setIso8601Duration(iso8601duration).build(); } diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java index 9c91fd27..8ca0ad0b 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/CopyNumberBuilder.java @@ -2,7 +2,7 @@ import org.ga4gh.vrs.v1.*; import org.ga4gh.vrs.v1.Number; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; public class CopyNumberBuilder { @@ -50,7 +50,7 @@ public CopyNumberBuilder threeCopies() { public CopyNumberBuilder nCopies(int n) { if (n < 0) { - throw new PhenotoolsRuntimeException("Negative copy numbers are not allowed"); + throw new PhenopacketToolsRuntimeException("Negative copy numbers are not allowed"); } builder.setNumber(Number.newBuilder().setValue(n)); return this; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java deleted file mode 100644 index 7735686d..00000000 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/exceptions/PhenotoolsRuntimeException.java +++ /dev/null @@ -1,6 +0,0 @@ -package org.phenopackets.phenopackettools.builder.exceptions; - -public class PhenotoolsRuntimeException extends RuntimeException { - public PhenotoolsRuntimeException() { super();} - public PhenotoolsRuntimeException(String m) { super(m);} -} diff --git a/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java index f16bb59b..c91c4071 100644 --- a/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java +++ b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/TimeElementsTest.java @@ -3,7 +3,7 @@ import com.google.protobuf.Timestamp; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.TimeElement; @@ -36,7 +36,7 @@ public void testValidIso8601Age() { public void testInvalidIso8601Age() { // B instead of Y -- invalid, should throw exception String iso8601 = "P31B3M2D"; - Assertions.assertThrows(PhenotoolsRuntimeException.class, () -> { + Assertions.assertThrows(PhenopacketToolsRuntimeException.class, () -> { TimeElement age = TimeElements.age(iso8601); }); } @@ -80,6 +80,7 @@ public void testTimestamp() { assertTrue(time.hasTimestamp()); assertEquals(timestamp, time.getTimestamp()); } + @Test public void testTimeInterval() { String time1 = "2020-03-17T00:00:00Z"; diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 750e3107..8c23d0ce 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -31,6 +31,11 @@ phenopacket-tools-validator-jsonschema ${project.parent.version} + + org.phenopackets.phenopackettools + phenopacket-tools-io + ${project.parent.version} + info.picocli picocli @@ -39,18 +44,6 @@ ch.qos.logback logback-classic - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.dataformat - jackson-dataformat-yaml - - - com.google.protobuf - protobuf-java-util - org.monarchinitiative.phenol phenol-core @@ -59,6 +52,11 @@ org.monarchinitiative.phenol phenol-io + + + org.yaml + snakeyaml + org.apache.commons commons-csv diff --git a/phenopacket-tools-cli/src/main/java/module-info.java b/phenopacket-tools-cli/src/main/java/module-info.java index 04e7998b..9ba10f9a 100644 --- a/phenopacket-tools-cli/src/main/java/module-info.java +++ b/phenopacket-tools-cli/src/main/java/module-info.java @@ -1,5 +1,6 @@ module org.phenopackets.phenopackettools.cli { requires org.phenopackets.phenopackettools.util; + requires org.phenopackets.phenopackettools.io; requires org.phenopackets.phenopackettools.converter; requires org.phenopackets.phenopackettools.builder; requires org.phenopackets.phenopackettools.validator.jsonschema; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java index 070e9ddd..763c4d67 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java @@ -1,14 +1,14 @@ package org.phenopackets.phenopackettools.command; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; -import org.phenopackets.phenopackettools.util.format.FormatSniffException; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.util.format.ElementSniffer; import org.phenopackets.phenopackettools.util.format.FormatSniffer; -import org.phenopackets.phenopackettools.util.format.PhenopacketElement; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; -import org.phenopackets.schema.v1.Cohort; -import org.phenopackets.schema.v1.Family; -import org.phenopackets.schema.v1.Phenopacket; +import org.phenopackets.phenopackettools.util.format.SniffException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; @@ -27,6 +27,8 @@ public abstract class BaseIOCommand extends BaseCommand { private static final Logger LOGGER = LoggerFactory.getLogger(BaseIOCommand.class); + private final PhenopacketParserFactory parserFactory; + @CommandLine.ArgGroup(validate = false, heading = "Inputs:%n") public InputSection inputSection = new InputSection(); @@ -36,15 +38,21 @@ public static class InputSection { description = "Input phenopacket(s).%nLeave empty for STDIN") public List inputs = null; - // The format will be sniffed if it is uninitialized. + // The format will be sniffed if it is not provided. @CommandLine.Option(names = {"-f", "--format"}, - description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") + description = {"Phenopacket format.", + "Choose from: {${COMPLETION-CANDIDATES}}"}) public PhenopacketFormat format = null; - // TODO - is it too hard to implement element sniffing? @CommandLine.Option(names = {"-e", "--element"}, - description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") + description = {"Top-level element.", + "Choose from {${COMPLETION-CANDIDATES}}", + "Default: phenopacket"}) public PhenopacketElement element = null; + + } + protected BaseIOCommand() { + parserFactory = PhenopacketParserFactory.getInstance(); } /** @@ -56,22 +64,23 @@ public static class InputSection { * Note that the function does not return if reading fails. */ protected List readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) { + PhenopacketParser parser = parserFactory.forFormat(schemaVersion); if (inputSection.inputs == null) { - // Assuming a single input is coming from STDIN + // The user did not set `-i | --input` option, assuming a single input is coming from STDIN. InputStream is = System.in; try { setFormatAndElement(is); - return List.of(new MessageAndPath(parseMessage(schemaVersion, is), null)); - } catch (FormatSniffException e) { + Message message = parser.parse(inputSection.format, inputSection.element, is); + return List.of(new MessageAndPath(message, null)); + } catch (SniffException e) { System.err.println("Unable to detect input format from STDIN.\nConsider using the `--format` option."); - System.exit(1); } catch (IOException e) { System.err.println("Unable to read STDIN: " + e.getMessage() + "\nPlease check the input format."); - System.exit(1); } + System.exit(1); } else { // Assuming a one or more input are provided via `-i | --input`. - + // // Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied. assert !inputSection.inputs.isEmpty(); @@ -79,9 +88,9 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem for (Path input : inputSection.inputs) { try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { setFormatAndElement(is); - Message message = parseMessage(schemaVersion, is); + Message message = parser.parse(inputSection.format, inputSection.element, is); messages.add(new MessageAndPath(message, input)); - } catch (FormatSniffException e) { + } catch (SniffException e) { System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); System.exit(1); } catch (IOException e) { @@ -91,90 +100,44 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem } return messages; } - return null; // Cannot happen but to make the compiler happy... + return null; // Cannot happen since System.exit() never returns, but to make the compiler happy... } - private void setFormatAndElement(InputStream is) throws IOException, FormatSniffException { - PhenopacketFormat sniffed = parseFormat(is); + /** + * Peek into the provided {@link InputStream} {@code is} to set {@link InputSection#format} + * and {@link InputSection#element} items + * + * @throws IOException if I/O error happens + * @throws SniffException if we cannot sniff the format + */ + private void setFormatAndElement(InputStream is) throws IOException, SniffException { + // Set format. + PhenopacketFormat fmt = FormatSniffer.sniff(is); if (inputSection.format == null) { - inputSection.format = sniffed; + LOGGER.info("Input format was not provided, making an educated guess.."); + LOGGER.info("The input looks like a {} file", fmt); + inputSection.format = fmt; } else { - if (!inputSection.format.equals(sniffed)) + if (!inputSection.format.equals(fmt)) // This can happen e.g. if processing multiple files at once but one turns out to be a different format. // We emit warning because this is likely not what the user intended and the code will likely explode // further downstream. - LOGGER.warn("Input format is set to {} but the current input looks like {}", inputSection.format, sniffed); + LOGGER.warn("Input format is set to {} but the current input looks like a {}", inputSection.format, fmt); } + // Set element. + PhenopacketElement element = ElementSniffer.sniff(is); if (inputSection.element == null) { - LOGGER.info("Input element type (-e | --element) was not provided, assuming phenopacket.."); - inputSection.element = PhenopacketElement.PHENOPACKET; - } - } - - private Message parseMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - return switch (inputSection.format) { - case PROTOBUF -> readProtobufMessage(schemaVersion, is); - case JSON -> readJsonMessage(schemaVersion, is); - // TODO - implement YAML parsing - case YAML -> throw new RuntimeException("YAML parser is not yet implemented"); - }; - } - - private Message readProtobufMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - LOGGER.debug("Reading protobuf message"); - return switch (schemaVersion) { - case V1 -> switch (inputSection.element) { - case PHENOPACKET -> Phenopacket.parseFrom(is); - case FAMILY -> Family.parseFrom(is); - case COHORT -> Cohort.parseFrom(is); - }; - case V2 -> switch (inputSection.element) { - - case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.parseFrom(is); - case FAMILY -> org.phenopackets.schema.v2.Family.parseFrom(is); - case COHORT -> org.phenopackets.schema.v2.Cohort.parseFrom(is); - }; - }; - } - - private Message readJsonMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - LOGGER.debug("Reading JSON message"); - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); - Message.Builder builder = prepareBuilder(schemaVersion, inputSection.element); - JsonFormat.parser().merge(reader, builder); - return builder.build(); - } - - private static Message.Builder prepareBuilder(PhenopacketSchemaVersion schemaVersion, PhenopacketElement element) { - return switch (schemaVersion) { - case V1 -> switch (element) { - case PHENOPACKET -> org.phenopackets.schema.v1.Phenopacket.newBuilder(); - case FAMILY -> org.phenopackets.schema.v1.Family.newBuilder(); - case COHORT -> org.phenopackets.schema.v1.Cohort.newBuilder(); - }; - case V2 -> switch (element) { - case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.newBuilder(); - case FAMILY -> org.phenopackets.schema.v2.Family.newBuilder(); - case COHORT -> org.phenopackets.schema.v2.Cohort.newBuilder(); - }; - }; - } - - private PhenopacketFormat parseFormat(InputStream is) throws IOException, FormatSniffException { - if (inputSection.format == null) { - LOGGER.info("Input format was not provided, making an educated guess.."); - PhenopacketFormat fmt = FormatSniffer.sniff(is); - LOGGER.info("The input looks like a {} file", fmt); - return fmt; + LOGGER.info("Input element type (-e | --element) was not provided, making an educated guess.."); + LOGGER.info("The input looks like a {} ", element); + inputSection.element = element; + } else { + if (!inputSection.element.equals(element)) + // Let's go an extra mile and check for the user. + LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element); } - return inputSection.format; } protected record MessageAndPath(Message message, Path path) {} - protected enum PhenopacketSchemaVersion { - V1, - V2; - } } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index f2221961..8bb481bc 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -3,7 +3,8 @@ import com.google.protobuf.Message; import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.schema.v1.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java index 4c71a45a..69b3fc0d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java @@ -8,7 +8,7 @@ import com.google.protobuf.Message; import com.google.protobuf.util.JsonFormat; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.phenopackettools.examples.*; import picocli.CommandLine; import picocli.CommandLine.Command; @@ -108,7 +108,7 @@ private static void outputJsonMessage(Message message, Path outDir, String fileN String json = JsonFormat.printer().print(message); writer.write(json); } catch (IOException e) { - throw new PhenotoolsRuntimeException(e.getMessage()); + throw new PhenopacketToolsRuntimeException(e.getMessage()); } } @@ -121,7 +121,7 @@ private static void outputYamlMessage(Message family, Path outDir, String yamlNa JsonNode node = JsonNodeFactory.instance.objectNode().set(messageName, jsonNodeTree); mapper.writeValue(writer, node); } catch (IOException e) { - throw new PhenotoolsRuntimeException(e.getMessage()); + throw new PhenopacketToolsRuntimeException(e.getMessage()); } } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java index 86a36979..4f5ad8b8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -4,6 +4,8 @@ import com.google.protobuf.MessageOrBuilder; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.core.metadata.MetaDataValidators; import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; @@ -129,7 +131,7 @@ private List prepareCustomSchemaUrls() { * for the current {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element}. * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} * while {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} - * is {@link org.phenopackets.phenopackettools.util.format.PhenopacketElement#FAMILY}. + * is {@link PhenopacketElement#FAMILY}. */ private List> configureSemanticValidators() { // Right now we only have one semantic validator, but we'll extend this in the future. diff --git a/phenopacket-tools-converter/pom.xml b/phenopacket-tools-converter/pom.xml index 843db16e..0dd20d5e 100644 --- a/phenopacket-tools-converter/pom.xml +++ b/phenopacket-tools-converter/pom.xml @@ -22,10 +22,6 @@ org.phenopackets phenopacket-schema - - com.google.protobuf - protobuf-java - org.phenopackets.phenopackettools diff --git a/phenopacket-tools-converter/src/main/java/module-info.java b/phenopacket-tools-converter/src/main/java/module-info.java index 1eed0a0f..a825bd10 100644 --- a/phenopacket-tools-converter/src/main/java/module-info.java +++ b/phenopacket-tools-converter/src/main/java/module-info.java @@ -1,6 +1,8 @@ module org.phenopackets.phenopackettools.converter { requires transitive org.phenopackets.schema; + requires org.phenopackets.phenopackettools.core; requires org.phenopackets.phenopackettools.builder; + requires org.slf4j; exports org.phenopackets.phenopackettools.converter.converters; } \ No newline at end of file diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java index adafb39e..397d6e29 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2Converter.java @@ -12,7 +12,7 @@ * assuming all {@link org.phenopackets.schema.v1.core.Variant}s are * {@link org.phenopackets.schema.v2.core.GenomicInterpretation.InterpretationStatus#CAUSATIVE}. For this to work, * there must be exactly one {@link org.phenopackets.schema.v1.core.Disease} in the phenopacket, otherwise - * a {@link org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException} is thrown. + * a {@link org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException} is thrown. */ public interface V1ToV2Converter { diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java index 7d9b9cd5..14934cfe 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java @@ -2,13 +2,15 @@ import org.ga4gh.vrsatile.v1.VariationDescriptor; import org.phenopackets.phenopackettools.builder.builders.*; -import org.phenopackets.phenopackettools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.schema.v1.core.Variant; import org.phenopackets.schema.v2.Cohort; import org.phenopackets.schema.v2.Family; import org.phenopackets.schema.v2.Phenopacket; import org.phenopackets.schema.v2.core.Interpretation; import org.phenopackets.schema.v2.core.OntologyClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; import java.util.function.Function; @@ -27,6 +29,8 @@ */ class V1ToV2ConverterImpl implements V1ToV2Converter { + private static final Logger LOGGER = LoggerFactory.getLogger(V1ToV2ConverterImpl.class); + private final boolean convertVariants; V1ToV2ConverterImpl(boolean convertVariants) { @@ -49,8 +53,11 @@ public Phenopacket convertPhenopacket(org.phenopackets.schema.v1.Phenopacket phe builder.addAllBiosamples(toBiosamples(phenopacket.getBiosamplesList())); } - if (convertVariants) - builder.addInterpretations(toV2Interpretation(phenopacket)); + if (convertVariants) { + Interpretation interpretation = toV2Interpretation(phenopacket); + if (!Interpretation.getDefaultInstance().equals(interpretation)) + builder.addInterpretations(interpretation); + } if (phenopacket.getDiseasesCount() > 0) { builder.addAllDiseases(toDiseases(phenopacket.getDiseasesList())); @@ -123,7 +130,16 @@ private static Interpretation toV2Interpretation(org.phenopackets.schema.v1.Phen so we will use the v1 phenopacket id for the interpretation id. */ if (v1.getDiseasesCount() != 1) { - throw new PhenotoolsRuntimeException("Can only convert variants if there is exactly one disease in v1 phenopacket!"); + if (v1.getVariantsCount() == 0) { + // If there are no variants then we do not care about having exactly one disease. + // We can still create a meaningful phenopacket, however, this may be not what the user intended, + // and we'll warn. + LOGGER.warn("Unable to convert disease and variant data since there are no variants in phenopacket '{}'", v1.getId()); + return Interpretation.getDefaultInstance(); + } else { + // Non-empty variant list but not a single disease, we throw. + throw new PhenopacketToolsRuntimeException("Can only convert variants if there is exactly one disease in v1 phenopacket!"); + } } var v1disease = v1.getDiseases(0); @@ -190,7 +206,7 @@ private static Function toVariationDescriptor() { .build(); } // cannot ever happen, but if it does... - case ALLELE_NOT_SET -> throw new PhenotoolsRuntimeException("Did not recognize variant type"); + case ALLELE_NOT_SET -> throw new PhenopacketToolsRuntimeException("Did not recognize variant type"); }; }; } diff --git a/phenopacket-tools-core/pom.xml b/phenopacket-tools-core/pom.xml new file mode 100644 index 00000000..2b2ac661 --- /dev/null +++ b/phenopacket-tools-core/pom.xml @@ -0,0 +1,14 @@ + + + 4.0.0 + + phenopacket-tools + org.phenopackets.phenopackettools + 0.4.7-SNAPSHOT + + + phenopacket-tools-core + + \ No newline at end of file diff --git a/phenopacket-tools-core/src/main/java/module-info.java b/phenopacket-tools-core/src/main/java/module-info.java new file mode 100644 index 00000000..5a3b93e6 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.phenopackets.phenopackettools.core { + exports org.phenopackets.phenopackettools.core; +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketElement.java similarity index 95% rename from phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java rename to phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketElement.java index 852c8ddc..27d5cf62 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketElement.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.util.format; +package org.phenopackets.phenopackettools.core; import java.util.Arrays; import java.util.stream.Collectors; diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketFormat.java similarity index 96% rename from phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java rename to phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketFormat.java index d9020991..3db508a4 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketFormat.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.util.format; +package org.phenopackets.phenopackettools.core; import java.util.Arrays; import java.util.stream.Collectors; diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java new file mode 100644 index 00000000..6fa23845 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketSchemaVersion.java @@ -0,0 +1,18 @@ +package org.phenopackets.phenopackettools.core; + +/** + * An enum with currently supported Phenopacket schema versions. + */ +public enum PhenopacketSchemaVersion { + + /** + * The version 1 of the GA4GH Phenopacket schema released in 2019 to elicit community response. + * The {@code V1} has been deprecated in favor of {@link #V2}. + */ + V1, + /** + * The version 2 of the GA4GH Phenopacket schema. This is the current version. + */ + V2 + +} diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java new file mode 100644 index 00000000..8f1f664a --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsException.java @@ -0,0 +1,28 @@ +package org.phenopackets.phenopackettools.core; + +/** + * Base checked exception thrown by phenopacket-tools. + */ +public class PhenopacketToolsException extends Exception { + + public PhenopacketToolsException() { + super(); + } + + public PhenopacketToolsException(String message) { + super(message); + } + + public PhenopacketToolsException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketToolsException(Throwable cause) { + super(cause); + } + + protected PhenopacketToolsException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java new file mode 100644 index 00000000..b67798c5 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/PhenopacketToolsRuntimeException.java @@ -0,0 +1,28 @@ +package org.phenopackets.phenopackettools.core; + +/** + * Base unchecked exception thrown by phenopacket-tools. + */ +public class PhenopacketToolsRuntimeException extends RuntimeException { + + public PhenopacketToolsRuntimeException() { + super(); + } + + public PhenopacketToolsRuntimeException(String message) { + super(message); + } + + public PhenopacketToolsRuntimeException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketToolsRuntimeException(Throwable cause) { + super(cause); + } + + protected PhenopacketToolsRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java new file mode 100644 index 00000000..d88cf715 --- /dev/null +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with constants and types used across the entire application, including the base exception classes. + */ +package org.phenopackets.phenopackettools.core; \ No newline at end of file diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml new file mode 100644 index 00000000..cd4817f9 --- /dev/null +++ b/phenopacket-tools-io/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + phenopacket-tools + org.phenopackets.phenopackettools + 0.4.7-SNAPSHOT + + + phenopacket-tools-io + + + + org.phenopackets.phenopackettools + phenopacket-tools-util + ${project.parent.version} + + + org.phenopackets + phenopacket-schema + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + + + org.yaml + snakeyaml + + + com.google.protobuf + protobuf-java + + + com.google.protobuf + protobuf-java-util + + + + \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/module-info.java b/phenopacket-tools-io/src/main/java/module-info.java new file mode 100644 index 00000000..ee92b566 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/module-info.java @@ -0,0 +1,10 @@ +module org.phenopackets.phenopackettools.io { + requires org.phenopackets.phenopackettools.util; + + requires org.phenopackets.schema; + requires com.google.protobuf; + requires com.google.protobuf.util; + requires org.slf4j; + + exports org.phenopackets.phenopackettools.io; +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java new file mode 100644 index 00000000..98a4452e --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParser.java @@ -0,0 +1,79 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.util.format.FormatSniffer; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.util.format.SniffException; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +public interface PhenopacketParser { + + Message parse(PhenopacketFormat format, PhenopacketElement element, InputStream is) throws IOException; + + default Message parse(PhenopacketFormat format, PhenopacketElement element, Path path) throws IOException { + try (InputStream is = openInputStream(path)) { + return parse(format, element, is); + } + } + + /* ******************************************* CONVENIENCE METHODS ******************************************* */ + + // We need to detect the element. + + default Message parse(PhenopacketFormat format, InputStream is) throws IOException { + PhenopacketElement element = sniffElement(is); + return parse(format, element, is); + } + + default Message parse(PhenopacketFormat format, Path path) throws IOException { + try (InputStream is = openInputStream(path)) { + return parse(format, is); + } + } + + // We need to detect the format. + + default Message parse(PhenopacketElement element, InputStream is) throws IOException, SniffException { + PhenopacketFormat format = sniffFormat(is); + return parse(format, element, is); + } + + default Message parse(PhenopacketElement element, Path path) throws IOException, SniffException { + try (InputStream is = openInputStream(path)) { + return parse(element, is); + } + } + + // We need to detect both the format and the element. + + default Message parse(InputStream is) throws IOException, SniffException { + PhenopacketFormat format = sniffFormat(is); + return parse(format, is); + } + + default Message parse(Path path) throws IOException, SniffException { + try (InputStream is = openInputStream(path)) { + return parse(is); + } + } + + /* ******************************************* UTILITY METHODS ******************************************* */ + + private static PhenopacketElement sniffElement(InputStream is) { + return PhenopacketElement.PHENOPACKET; // TODO - implement + } + + private static PhenopacketFormat sniffFormat(InputStream is) throws SniffException, IOException { + return FormatSniffer.sniff(is); + } + + private static BufferedInputStream openInputStream(Path path) throws IOException { + return new BufferedInputStream(Files.newInputStream(path)); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java new file mode 100644 index 00000000..7c29ba7c --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactory.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +public interface PhenopacketParserFactory { + + static PhenopacketParserFactory getInstance() { + return PhenopacketParserFactoryImpl.INSTANCE; + } + + /** + * Get a {@link PhenopacketParser} to parse phenopacket with given {@link PhenopacketSchemaVersion}. + * + * @throws PhenopacketParserFactoryException if a {@link PhenopacketParser} for the given {@code version} + * is not available + */ + PhenopacketParser forFormat(PhenopacketSchemaVersion version) throws PhenopacketParserFactoryException; + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java new file mode 100644 index 00000000..d030dd23 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryException.java @@ -0,0 +1,26 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; + +public class PhenopacketParserFactoryException extends PhenopacketToolsRuntimeException { + + public PhenopacketParserFactoryException() { + super(); + } + + public PhenopacketParserFactoryException(String message) { + super(message); + } + + public PhenopacketParserFactoryException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketParserFactoryException(Throwable cause) { + super(cause); + } + + protected PhenopacketParserFactoryException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java new file mode 100644 index 00000000..e24654a1 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImpl.java @@ -0,0 +1,19 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.io.v1.V1PhenopacketParser; +import org.phenopackets.phenopackettools.io.v2.V2PhenopacketParser; + +class PhenopacketParserFactoryImpl implements PhenopacketParserFactory { + + static final PhenopacketParserFactoryImpl INSTANCE = new PhenopacketParserFactoryImpl(); + + @Override + public PhenopacketParser forFormat(PhenopacketSchemaVersion version) throws PhenopacketParserFactoryException { + return switch (version) { + case V1 -> V1PhenopacketParser.INSTANCE; + case V2 -> V2PhenopacketParser.INSTANCE; + }; + } + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java new file mode 100644 index 00000000..b435d3d1 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java @@ -0,0 +1,51 @@ +package org.phenopackets.phenopackettools.io.base; + +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; + +public abstract class BasePhenopacketParser implements PhenopacketParser { + + private static final Logger LOGGER = LoggerFactory.getLogger(BasePhenopacketParser.class); + + @Override + public Message parse(PhenopacketFormat format, PhenopacketElement element, InputStream is) throws IOException { + return switch (format) { + case PROTOBUF -> { + LOGGER.debug("Reading protobuf message"); + yield readProtobufMessage(element, is); + } + case JSON -> { + LOGGER.debug("Reading JSON message"); + yield readJsonMessage(element, is); + } + case YAML -> { + LOGGER.debug("Reading YAML message"); + yield readYamlMessage(element, is); + } + }; + } + + protected abstract Message readProtobufMessage(PhenopacketElement element, InputStream is) throws IOException; + + private Message readJsonMessage(PhenopacketElement element, InputStream is) throws IOException { + // Not closing the BufferedReader as the InputStream should be closed. + BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + Message.Builder builder = prepareBuilder(element); + JsonFormat.parser().merge(reader, builder); + return builder.build(); + } + + protected abstract Message.Builder prepareBuilder(PhenopacketElement element); + + protected abstract Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException; +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java new file mode 100644 index 00000000..44647884 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java @@ -0,0 +1,40 @@ +package org.phenopackets.phenopackettools.io.v1; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.io.base.BasePhenopacketParser; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; + +import java.io.IOException; +import java.io.InputStream; + +public class V1PhenopacketParser extends BasePhenopacketParser { + + public static final V1PhenopacketParser INSTANCE = new V1PhenopacketParser(); + + @Override + protected Message readProtobufMessage(PhenopacketElement element, InputStream is) throws IOException { + return switch (element) { + case PHENOPACKET -> Phenopacket.parseFrom(is); + case FAMILY -> Family.parseFrom(is); + case COHORT -> Cohort.parseFrom(is); + }; + } + + @Override + protected Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException { + throw new RuntimeException("Not yet implemented"); // TODO - implement + } + + @Override + protected Message.Builder prepareBuilder(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.newBuilder(); + case FAMILY -> Family.newBuilder(); + case COHORT -> Cohort.newBuilder(); + }; + } + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java new file mode 100644 index 00000000..8221523f --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java @@ -0,0 +1,39 @@ +package org.phenopackets.phenopackettools.io.v2; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.io.base.BasePhenopacketParser; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.schema.v2.Cohort; +import org.phenopackets.schema.v2.Family; +import org.phenopackets.schema.v2.Phenopacket; + +import java.io.IOException; +import java.io.InputStream; + +public class V2PhenopacketParser extends BasePhenopacketParser { + + public static final V2PhenopacketParser INSTANCE = new V2PhenopacketParser(); + + @Override + protected Message readProtobufMessage(PhenopacketElement element, InputStream is) throws IOException { + return switch (element) { + case PHENOPACKET -> Phenopacket.parseFrom(is); + case FAMILY -> Family.parseFrom(is); + case COHORT -> Cohort.parseFrom(is); + }; + } + + @Override + protected Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException { + throw new RuntimeException("Not yet implemented"); // TODO - implement + } + + @Override + protected Message.Builder prepareBuilder(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.newBuilder(); + case FAMILY -> Family.newBuilder(); + case COHORT -> Cohort.newBuilder(); + }; + } +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java new file mode 100644 index 00000000..f7415fa1 --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/PhenopacketParserFactoryImplTest.java @@ -0,0 +1,30 @@ +package org.phenopackets.phenopackettools.io; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class PhenopacketParserFactoryImplTest { + + private PhenopacketParserFactoryImpl parserFactory; + + @BeforeEach + public void setUp() { + parserFactory = PhenopacketParserFactoryImpl.INSTANCE; + } + + @ParameterizedTest + @CsvSource({ + "V1", + "V2" + }) + public void weHaveAParserForAllSchemaVersions(PhenopacketSchemaVersion version) { + PhenopacketParser parser = parserFactory.forFormat(version); + assertThat(parser, is(notNullValue())); + } + +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java new file mode 100644 index 00000000..f123327d --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/TestBase.java @@ -0,0 +1,9 @@ +package org.phenopackets.phenopackettools.io; + +import java.nio.file.Path; + +public class TestBase { + + public static final Path BASE_DIR = Path.of("src/test/resources/org/phenopackets/phenopackettools/io"); + +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java new file mode 100644 index 00000000..749a843c --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java @@ -0,0 +1,58 @@ +package org.phenopackets.phenopackettools.io.v1; + +import com.google.protobuf.Message; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.TestBase; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; + +import java.nio.file.Path; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class V1PhenopacketParserTest { + + private static final Path BASE = TestBase.BASE_DIR.resolve("v1"); + private PhenopacketParser parser; + + @BeforeEach + public void setUp() { + parser = V1PhenopacketParser.INSTANCE; + } + + @ParameterizedTest + @CsvSource({ + "PROTOBUF, PHENOPACKET, phenopacket.pb", + "PROTOBUF, FAMILY, family.pb", + "PROTOBUF, COHORT, cohort.pb", + " JSON, PHENOPACKET, phenopacket.json", + " JSON, FAMILY, family.json", + " JSON, COHORT, cohort.json", + // TODO - finalize once we settle down on the YAML format +// " YAML, PHENOPACKET, phenopacket.yaml", +// " YAML, FAMILY, family.yaml", +// " YAML, COHORT, cohort.yaml", + }) + public void weGetExpectedClassForGivenFormatAndElement(PhenopacketFormat format, + PhenopacketElement element, + String fileName) throws Exception { + Message message = parser.parse(format, element, BASE.resolve(fileName)); + + assertThat(message, is(instanceOf(getClassForPhenopacketElement(element)))); + } + + private static Class getClassForPhenopacketElement(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.class; + case FAMILY -> Family.class; + case COHORT -> Cohort.class; + }; + } +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java new file mode 100644 index 00000000..c0eddb78 --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java @@ -0,0 +1,59 @@ +package org.phenopackets.phenopackettools.io.v2; + +import com.google.protobuf.Message; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.TestBase; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.schema.v2.Cohort; +import org.phenopackets.schema.v2.Family; +import org.phenopackets.schema.v2.Phenopacket; + +import java.nio.file.Path; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class V2PhenopacketParserTest { + + private static final Path BASE = TestBase.BASE_DIR.resolve("v2"); + + private PhenopacketParser parser; + + @BeforeEach + public void setUp() { + parser = V2PhenopacketParser.INSTANCE; + } + + @ParameterizedTest + @CsvSource({ + "PROTOBUF, PHENOPACKET, phenopacket.pb", + "PROTOBUF, FAMILY, family.pb", + "PROTOBUF, COHORT, cohort.pb", + " JSON, PHENOPACKET, phenopacket.json", + " JSON, FAMILY, family.json", + " JSON, COHORT, cohort.json", + // TODO - finalize once we settle down on the YAML format +// " YAML, PHENOPACKET, phenopacket.yaml", +// " YAML, FAMILY, family.yaml", +// " YAML, COHORT, cohort.yaml", + }) + public void weGetExpectedClassForGivenFormatAndElement(PhenopacketFormat format, + PhenopacketElement element, + String fileName) throws Exception { + Message message = parser.parse(format, element, BASE.resolve(fileName)); + + assertThat(message, is(instanceOf(getClassForPhenopacketElement(element)))); + } + + private static Class getClassForPhenopacketElement(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.class; + case FAMILY -> Family.class; + case COHORT -> Cohort.class; + }; + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md new file mode 100644 index 00000000..85c3c57c --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/README.md @@ -0,0 +1,8 @@ +# README + +The files in this folder correspond to comprehensive, albeit medically invalid, phenopacket elements: +- phenopacket +- family, or +- cohort. + +The content corresponds to the output of `TestData.V1.comprehensive*()` as of Oct 27th, 2022. diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json new file mode 100644 index 00000000..17ab00cb --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.json @@ -0,0 +1,251 @@ +{ + "id": "comprehensive-cohort-id", + "description": "A description of the example cohort.", + "members": [{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, { + "subject": { + "id": "MOTHER", + "sex": "FEMALE" + } + }, { + "subject": { + "id": "FATHER", + "sex": "MALE" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/FAM000001", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb new file mode 100644 index 00000000..5c57f5ee --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.pb @@ -0,0 +1,80 @@ + +comprehensive-cohort-id$A description of the example cohort. +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandd" +P14Y08B +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset" + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0" +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F* + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json new file mode 100644 index 00000000..b7ad2d73 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.json @@ -0,0 +1,268 @@ +{ + "id": "comprehensive-family-id", + "proband": { + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, + "relatives": [{ + "subject": { + "id": "MOTHER", + "sex": "FEMALE" + } + }, { + "subject": { + "id": "FATHER", + "sex": "MALE" + } + }], + "pedigree": { + "persons": [{ + "individualId": "14 year-old boy", + "paternalId": "FATHER", + "maternalId": "MOTHER", + "sex": "MALE", + "affectedStatus": "AFFECTED" + }, { + "individualId": "MOTHER", + "sex": "FEMALE", + "affectedStatus": "UNAFFECTED" + }, { + "individualId": "FATHER", + "sex": "MALE", + "affectedStatus": "UNAFFECTED" + }] + }, + "htsFiles": [{ + "uri": "file://data/genomes/FAM000001", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb new file mode 100644 index 00000000..fb131057 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.pb @@ -0,0 +1,83 @@ + +comprehensive-family-id +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandd" +P14Y08B +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset" + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0"C +%14 year-old boyFATHER"MOTHER(0 + MOTHER(0 + FATHER(0* +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F2 + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json new file mode 100644 index 00000000..e6848a9c --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.json @@ -0,0 +1,189 @@ +{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb new file mode 100644 index 00000000..8fe66cc2 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.pb @@ -0,0 +1,60 @@ + +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandd" +P14Y08B +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset" + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json new file mode 100644 index 00000000..e59170ac --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.json @@ -0,0 +1,294 @@ +{ + "id": "comprehensive-cohort-id", + "description": "A description of the example cohort.", + "members": [{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, { + "subject": { + "id": "MOTHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "FEMALE", + "taxonomy": { + } + } + }, { + "subject": { + "id": "FATHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "MALE", + "taxonomy": { + } + } + }], + "files": [{ + "uri": "file://data/genomes/FAM000001", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.pb new file mode 100644 index 0000000000000000000000000000000000000000..0679cfa09cdfea17ce246282ad8dae72315c188f GIT binary patch literal 3099 zcmeHJ-EJF26pmAqhDn<^BT&mWZKq2qlBKNoC$=|MRpB^xe$=&%8$j&^(d_QnyL5MF z*`0N|z2q&*4Tvjv1+GBig16uWxIy9pn6cyZ4{EL|RqBo7%$#$+Ip3V`8-q&@4daA( zBuf1;*>tE!6SnEQ@MaUaBy|!$W}QV4hWjYDbf$?VB`4;F)u}9>^vls zGGBr?G>8}F31;Bp4p{lA@t(x%iE^!iqQ{ti_VK_h` z9g~nF=~tu=P9rsKtERz~&SalPDPcQ56~HH8rQPa@LaJ&QP{SGHWT2E`HYtG8ERH!l zyt17mmSBfqn{$znAxTK&5Jd)2qT9##P(bTPDj#VBuC+YA z>{05X$yk?vUIZ6xpQaPl3R%CXMj2UI>hLM%QE{3Joxh;h@(r;1*-m@V>k4TYTPlZ1 zANmtZ7*Z~^B%G(x!P#{@xDHmjt%IXR%P{NJIxKa`h%1B7YR8X+GHl+^X+W)Bhif|| z@<$Qs5*i0M4ZpfI%hye{UW1o*{Zxn@?IqYH=+g`b{?I4MHw82cteN#K6Rz*K2YRJT zGSsK3FA%>iT;qc8Oa#AOt5)F}o~dX`m;R*iYI7@{lTHA~=~)L>LsPYys>1btr+v`r zv}4U2xbg8uSRNDJCz}|c^pVF{%=?V9Bv7bL1K&o@15ilAfpHdu-?J0vb#>Swk^+;UMoLDTG%_j_`3qC}4ED^Ub7rErnkw;RTC0Q!XJGID}u{Axlu9k~JwsTJ{f{yifbVXIv#%p>nAuNF`00+aCs;XMOZo4X4bdF-jO_vjiwfqlm_gXSOoLVr&y^`2@veOkxt+1QKv@*gM>7 z8LFw8hNeqL?ZLgqj)sDiC-G>Ae1aV>oFGQgE@7@u0u*GtUChnUiN{=?%uZvTjUkr= zn5D6YTL>eY7sU4jydwQpfVI;~3~gIe;f)xYI#}tq z506@H!)(@@u+k?J&I~@QT`v^Ouy{eI4vl6LuI-Z0n}n!OXyoH0`21ELubXPK0Wa-& zi6A@LkFi70M=AEbu}9*s3MhAMn9VH{t{-$pdaX}VG^B|q5Wg;5vs;NYSOXrxH1!>13;J#5JtD7q26?4|(@ zx$yBqSQZgpCz}bNbkk)l;&n!8>?_owzGuZa&J-F?YJ6F9BZZ#$GSK-&Y^uhlYF3K} zLtS1Fb}&4aAA^#p0j}@osDm8!{RQD7ZjP&M3siw#Es1;A-&+tWoQ~z&pd6)EKpa>e zrdE$Q)slFE=FthS_vGY632n<$LO)PK&s8RL%9|iUYVs76c*hyM$SH@8T$124P7`U- zsYOiQt@){SwOSIiU3hBYbgaLjE2^TlUebFBVFi?XgCi~h(vrNS7Va)dGh27(%vQX+ zB)7rM=k!r3&zUpnb$5DmQoWE<5auU6<5^GG Z<^X55?#tkw{+fILB^Ix__doxu{snF*zsdjr literal 0 HcmV?d00001 diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json new file mode 100644 index 00000000..89d29db4 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.json @@ -0,0 +1,220 @@ +{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb new file mode 100644 index 00000000..3acd16a2 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.pb @@ -0,0 +1,71 @@ + +comprehensive-phenopacket-id^ +14 year-old boyboypatientprobandd" + +P14Y08J +NCBITaxon:9606 homo sapiens& + +HP:0001558Decreased fetal movement2 + +HP:0011461 Fetal onsetB +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report./ + +HP:0031910!Abnormal cranial nerve physiologyB +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 + +P14YB +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.P + +HP:0001270 Motor delay" + +HP:0012825Mild2 + +HP:0011463Childhood onset* + biosample-id14 year-old boy" Muscle biopsy of 14 year-old boy*! +UBERON:0003403skin of forearmJ +NCBITaxon:9606 homo sapiensR + +P14YZ + NCIT:C38757Negative Findingb + +NCIT:C3677Benign Neoplasmj& + NCIT:C28076Disease Grade Qualifier + NCIT:C68748HER2/Neu Positive + EFO:0009655abnormal sample2 +comprehensive-phenopacket-idw + + OMIM:101600PFEIFFER SYNDROMES +14 year-old boy"><2 +hgvsNM_001848.2:c.877G>Ar + GENO:0000135 heterozygous:D + + OMIM:101600PFEIFFER SYNDROME  + +HP:0003577Congenital onsetR +file://data/genomes/P000001C +14 year-old boyP000001C +genomeAssembly +GRCh38.p13 + +fileFormatvcf1 + description"Whole genome sequencing VCF outputZ + +Peter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_22.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index b7ed424b..e595d0f1 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -11,4 +11,12 @@ phenopacket-tools-util + + + org.phenopackets.phenopackettools + phenopacket-tools-core + ${project.parent.version} + + + \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/module-info.java b/phenopacket-tools-util/src/main/java/module-info.java index 9d20effb..64b46143 100644 --- a/phenopacket-tools-util/src/main/java/module-info.java +++ b/phenopacket-tools-util/src/main/java/module-info.java @@ -1,3 +1,5 @@ module org.phenopackets.phenopackettools.util { + requires transitive org.phenopackets.phenopackettools.core; + exports org.phenopackets.phenopackettools.util.format; } \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java new file mode 100644 index 00000000..b90545f0 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffException.java @@ -0,0 +1,25 @@ +package org.phenopackets.phenopackettools.util.format; + +public class ElementSniffException extends SniffException { + + public ElementSniffException() { + super(); + } + + public ElementSniffException(String message) { + super(message); + } + + public ElementSniffException(String message, Throwable cause) { + super(message, cause); + } + + public ElementSniffException(Throwable cause) { + super(cause); + } + + protected ElementSniffException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java new file mode 100644 index 00000000..56d756fd --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java @@ -0,0 +1,48 @@ +package org.phenopackets.phenopackettools.util.format; + +import org.phenopackets.phenopackettools.core.PhenopacketElement; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Make an educated guess regarding which top-level element of Phenopacket schema is represented in the provided + * {@code byte[]} or {@link InputStream}. + */ +public class ElementSniffer { + + /** + * The number of bytes used for element sniffing. + */ + static final int BUFFER_SIZE = 32; + + private ElementSniffer() { + } + + /** + * Make an educated guess of {@link PhenopacketElement} present in given {@code input}. + * + * @param input an {@link InputStream} that supports {@link InputStream#mark(int)}. + * @return the sniffed {@link PhenopacketElement}. + * @throws IOException in case an error occurs while reading the {@code input}. + * @throws SniffException if there are not enough bytes available in the {@code input} of if the {@code input} does not + * support {@link InputStream#mark(int)}. + */ + public static PhenopacketElement sniff(InputStream input) throws IOException, SniffException { + return sniff(Util.getFirstBytesAndReset(input, BUFFER_SIZE)); + } + + /** + * Make an educated guess of {@link PhenopacketElement} based on given {@code payload}. + * + * @param payload buffer with at least the first {@link #BUFFER_SIZE} bytes of the input. + * @return the sniffed {@link PhenopacketElement}. + * @throws ElementSniffException if {@code payload} contains less than {@link #BUFFER_SIZE} bytes. + */ + public static PhenopacketElement sniff(byte[] payload) throws ElementSniffException { + if (payload.length < BUFFER_SIZE) + throw new ElementSniffException("Need at least %d bytes to sniff but got %d".formatted(BUFFER_SIZE, payload.length)); + // TODO - implement + return PhenopacketElement.PHENOPACKET; + } +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java index 3cc3a382..c33b276d 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java @@ -3,7 +3,7 @@ /** * An exception thrown when sniffing of the top-level element of Phenopacket schema cannot be performed. */ -public class FormatSniffException extends Exception { +public class FormatSniffException extends ElementSniffException { public FormatSniffException() { super(); diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java index 0d4984d6..9d6b7376 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java @@ -1,5 +1,7 @@ package org.phenopackets.phenopackettools.util.format; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; + import java.io.IOException; import java.io.InputStream; @@ -48,21 +50,7 @@ public static PhenopacketFormat sniff(byte[] payload) throws FormatSniffExceptio * @throws FormatSniffException if there are not enough bytes available in the {@code input} of if the {@code input} does not * support {@link InputStream#mark(int)}. */ - public static PhenopacketFormat sniff(InputStream input) throws IOException, FormatSniffException { - if (input.markSupported()) { - byte[] buffer = new byte[BUFFER_SIZE]; - input.mark(BUFFER_SIZE); - int read = input.read(buffer); - if (read < BUFFER_SIZE) { - // We explode because there are not enough bytes available for format sniffing. - String message = read < 0 - ? "The stream must not be at the end" - : "Need at least %d bytes to sniff the format but only %d was available".formatted(BUFFER_SIZE, read); - throw new FormatSniffException(message); - } - input.reset(); - return sniff(buffer); - } else - throw new FormatSniffException("The provided InputStream does not support `mark()`"); + public static PhenopacketFormat sniff(InputStream input) throws IOException, SniffException { + return sniff(Util.getFirstBytesAndReset(input, BUFFER_SIZE)); } } diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java new file mode 100644 index 00000000..fae81f92 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/SniffException.java @@ -0,0 +1,30 @@ +package org.phenopackets.phenopackettools.util.format; + +import org.phenopackets.phenopackettools.core.PhenopacketToolsException; + +/** + * A checked exception thrown in case of encountering some content sniffing issues. + */ +public class SniffException extends PhenopacketToolsException { + + public SniffException() { + super(); + } + + public SniffException(String message) { + super(message); + } + + public SniffException(String message, Throwable cause) { + super(message, cause); + } + + public SniffException(Throwable cause) { + super(cause); + } + + protected SniffException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java index 603769b1..9deb1fcd 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java @@ -31,4 +31,22 @@ static boolean looksLikeYaml(byte[] payload) { } } + static byte[] getFirstBytesAndReset(InputStream input, int nBytes) throws SniffException, IOException { + if (input.markSupported()) { + byte[] buffer = new byte[nBytes]; + input.mark(nBytes); + int read = input.read(buffer); + if (read < nBytes) { + // We explode because there are not enough bytes available for format sniffing. + String message = read < 0 + ? "The stream must not be at the end" + : "Need at least %d bytes to sniff the format but only %d was available".formatted(nBytes, read); + throw new SniffException(message); + } + input.reset(); + return buffer; + } else + throw new SniffException("The provided InputStream does not support `mark()`"); + + } } diff --git a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java index 5a1200fb..3321e094 100644 --- a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java +++ b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java @@ -2,6 +2,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; import java.io.BufferedInputStream; import java.io.IOException; diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index 50d38a2b..52bb7664 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -16,6 +16,11 @@ Validator utilities for phenopackets + + org.phenopackets.phenopackettools + phenopacket-tools-core + ${project.parent.version} + org.phenopackets phenopacket-schema diff --git a/phenopacket-tools-validator-core/src/main/java/module-info.java b/phenopacket-tools-validator-core/src/main/java/module-info.java index 1abaea74..b64606c4 100644 --- a/phenopacket-tools-validator-core/src/main/java/module-info.java +++ b/phenopacket-tools-validator-core/src/main/java/module-info.java @@ -6,6 +6,7 @@ exports org.phenopackets.phenopackettools.validator.core.phenotype; exports org.phenopackets.phenopackettools.validator.core.writer; + requires org.phenopackets.phenopackettools.core; requires org.monarchinitiative.phenol.core; requires org.phenopackets.schema; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java index d8f13cc4..6d4250f7 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java @@ -1,9 +1,11 @@ package org.phenopackets.phenopackettools.validator.core; +import org.phenopackets.phenopackettools.core.PhenopacketToolsException; + /** - * An {@link Exception} that is thrown in case the provided data has incorrect format. + * A {@link PhenopacketToolsException} that is thrown in case the provided data has incorrect format. */ -public class ConversionException extends Exception implements ValidationResult { +public class ConversionException extends PhenopacketToolsException implements ValidationResult { private static final String VALIDATION_CATEGORY = "input"; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java index c55bc234..a4ea709a 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorException.java @@ -1,6 +1,8 @@ package org.phenopackets.phenopackettools.validator.core.except; -public class PhenopacketValidatorException extends Exception { +import org.phenopackets.phenopackettools.core.PhenopacketToolsException; + +public class PhenopacketValidatorException extends PhenopacketToolsException { public PhenopacketValidatorException() { super(); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java index 663d3e15..30f728d0 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/PhenopacketValidatorRuntimeException.java @@ -1,6 +1,8 @@ package org.phenopackets.phenopackettools.validator.core.except; -public class PhenopacketValidatorRuntimeException extends RuntimeException { +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; + +public class PhenopacketValidatorRuntimeException extends PhenopacketToolsRuntimeException { public PhenopacketValidatorRuntimeException() { super(); diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index ae1ee8e2..3bfd5bdf 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -5,7 +5,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.protobuf.MessageOrBuilder; import org.phenopackets.phenopackettools.util.format.FormatSniffer; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.phenopackettools.util.format.FormatSniffException; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.jsonschema.impl.JsonSchemaValidator; diff --git a/pom.xml b/pom.xml index b4c37468..ef98b259 100644 --- a/pom.xml +++ b/pom.xml @@ -15,12 +15,14 @@ + phenopacket-tools-core phenopacket-tools-test phenopacket-tools-util phenopacket-tools-builder phenopacket-tools-validator-core phenopacket-tools-validator-jsonschema phenopacket-tools-converter + phenopacket-tools-io phenopacket-tools-cli @@ -84,9 +86,12 @@ UTF-8 UTF-8 17 - 3.21.1 + 3.21.8 2.0.2 - 2.13.3 + 2.13.4.2 + + 2.13.4 + 1.33 2.0.0-RC3 5.7.1 @@ -149,18 +154,18 @@ com.fasterxml.jackson.core - jackson-databind - ${jackson.version} + jackson-core + ${jackson.core.version} com.fasterxml.jackson.core - jackson-core + jackson-databind ${jackson.version} - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + ${jackson.core.version} com.google.guava @@ -187,6 +192,11 @@ phenol-io ${phenol.version} + + org.yaml + snakeyaml + ${snakeyaml.version} + org.apache.commons commons-csv From 13afe51c39b28019e4ab8d06f45c0a96bb68756b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 1 Nov 2022 09:04:28 -0400 Subject: [PATCH 20/79] Setup stubs for element sniffing. Signed-off-by: Daniel Danis --- .../command/BaseIOCommand.java | 8 ++-- .../src/main/java/module-info.java | 1 + .../util/format/ElementSniffer.java | 46 +++++++++++++++++-- .../util/format/ElementSnifferTest.java | 11 +++++ 4 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java index 763c4d67..a374be8b 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java @@ -69,7 +69,7 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem // The user did not set `-i | --input` option, assuming a single input is coming from STDIN. InputStream is = System.in; try { - setFormatAndElement(is); + setFormatAndElement(is, schemaVersion); Message message = parser.parse(inputSection.format, inputSection.element, is); return List.of(new MessageAndPath(message, null)); } catch (SniffException e) { @@ -87,7 +87,7 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem List messages = new ArrayList<>(); for (Path input : inputSection.inputs) { try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { - setFormatAndElement(is); + setFormatAndElement(is, schemaVersion); Message message = parser.parse(inputSection.format, inputSection.element, is); messages.add(new MessageAndPath(message, input)); } catch (SniffException e) { @@ -110,7 +110,7 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem * @throws IOException if I/O error happens * @throws SniffException if we cannot sniff the format */ - private void setFormatAndElement(InputStream is) throws IOException, SniffException { + private void setFormatAndElement(InputStream is, PhenopacketSchemaVersion schemaVersion) throws IOException, SniffException { // Set format. PhenopacketFormat fmt = FormatSniffer.sniff(is); if (inputSection.format == null) { @@ -126,7 +126,7 @@ private void setFormatAndElement(InputStream is) throws IOException, SniffExcept } // Set element. - PhenopacketElement element = ElementSniffer.sniff(is); + PhenopacketElement element = ElementSniffer.sniff(is, schemaVersion, inputSection.format); if (inputSection.element == null) { LOGGER.info("Input element type (-e | --element) was not provided, making an educated guess.."); LOGGER.info("The input looks like a {} ", element); diff --git a/phenopacket-tools-util/src/main/java/module-info.java b/phenopacket-tools-util/src/main/java/module-info.java index 64b46143..f316d6a6 100644 --- a/phenopacket-tools-util/src/main/java/module-info.java +++ b/phenopacket-tools-util/src/main/java/module-info.java @@ -1,5 +1,6 @@ module org.phenopackets.phenopackettools.util { requires transitive org.phenopackets.phenopackettools.core; + requires org.slf4j; exports org.phenopackets.phenopackettools.util.format; } \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java index 56d756fd..acad085e 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java @@ -1,6 +1,10 @@ package org.phenopackets.phenopackettools.util.format; import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; @@ -11,6 +15,9 @@ */ public class ElementSniffer { + // Remove SLF4J from module-info if we omit logging. + private static final Logger LOGGER = LoggerFactory.getLogger(ElementSniffer.class); + /** * The number of bytes used for element sniffing. */ @@ -22,27 +29,58 @@ private ElementSniffer() { /** * Make an educated guess of {@link PhenopacketElement} present in given {@code input}. * - * @param input an {@link InputStream} that supports {@link InputStream#mark(int)}. + * @param input an {@link InputStream} that supports {@link InputStream#mark(int)}. + * @param format the {@code payload} format * @return the sniffed {@link PhenopacketElement}. * @throws IOException in case an error occurs while reading the {@code input}. * @throws SniffException if there are not enough bytes available in the {@code input} of if the {@code input} does not * support {@link InputStream#mark(int)}. */ - public static PhenopacketElement sniff(InputStream input) throws IOException, SniffException { - return sniff(Util.getFirstBytesAndReset(input, BUFFER_SIZE)); + public static PhenopacketElement sniff(InputStream input, + PhenopacketSchemaVersion schemaVersion, + PhenopacketFormat format) throws IOException, SniffException { + return sniff(Util.getFirstBytesAndReset(input, BUFFER_SIZE), schemaVersion, format); } /** * Make an educated guess of {@link PhenopacketElement} based on given {@code payload}. * * @param payload buffer with at least the first {@link #BUFFER_SIZE} bytes of the input. + * @param format the {@code payload} format * @return the sniffed {@link PhenopacketElement}. * @throws ElementSniffException if {@code payload} contains less than {@link #BUFFER_SIZE} bytes. */ - public static PhenopacketElement sniff(byte[] payload) throws ElementSniffException { + public static PhenopacketElement sniff(byte[] payload, + PhenopacketSchemaVersion schemaVersion, + PhenopacketFormat format) throws ElementSniffException { if (payload.length < BUFFER_SIZE) throw new ElementSniffException("Need at least %d bytes to sniff but got %d".formatted(BUFFER_SIZE, payload.length)); + + return switch (format) { + case PROTOBUF -> sniffProtobuf(payload, schemaVersion); + case JSON -> sniffJson(payload, schemaVersion); + case YAML -> sniffYaml(payload, schemaVersion); + }; + } + + private static PhenopacketElement sniffProtobuf(byte[] payload, PhenopacketSchemaVersion schemaVersion) { + // TODO - implement + LOGGER.warn("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + return PhenopacketElement.PHENOPACKET; + } + + private static PhenopacketElement sniffJson(byte[] payload, PhenopacketSchemaVersion schemaVersion) { + // TODO - implement + // TODO - reconsider the sniffing workflow. In case of loosely defined formats like JSON and YAML, + // the fields can be in any order and we may not get enough information. + // Is it OK to throw upon sniffing failure or an Optional is enough? + LOGGER.warn("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + return PhenopacketElement.PHENOPACKET; + } + + private static PhenopacketElement sniffYaml(byte[] payload, PhenopacketSchemaVersion schemaVersion) { // TODO - implement + LOGGER.warn("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); return PhenopacketElement.PHENOPACKET; } } diff --git a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java new file mode 100644 index 00000000..22a095cd --- /dev/null +++ b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/ElementSnifferTest.java @@ -0,0 +1,11 @@ +package org.phenopackets.phenopackettools.util.format; + +import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class ElementSnifferTest { + + // TODO - implement + +} \ No newline at end of file From 401ab4f1fab13b349ffef7a97b15e4eca8fd1938 Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Tue, 1 Nov 2022 12:31:51 -0400 Subject: [PATCH 21/79] Adding response constants --- constants/Response.tsv | 15 ++++++++-- docs/constants.rst | 15 ++++++++-- .../builder/constants/Response.java | 30 ++++++++++++++++--- .../AtaxiaWithVitaminEdeficiency.java | 4 +-- ...SevereStatinInducedAutoimmuneMyopathy.java | 4 +-- 5 files changed, 56 insertions(+), 12 deletions(-) diff --git a/constants/Response.tsv b/constants/Response.tsv index f2f9b3b9..ae2fd96e 100644 --- a/constants/Response.tsv +++ b/constants/Response.tsv @@ -1,3 +1,14 @@ ontology.id ontology.label variable.name function.name -NCIT:C102560 Favorable FAVORABLE favorable -NCIT:C102561 Unfavorable UNFAVORABLE unfavorable +NCIT:C123584 Favorable Response FAVORABLE_RESPONSE favorableResponse +NCIT:C123617 Unfavorable Response UNFAVORABLE_RESPONSE unfavorableResponse +NCIT:C123600 No Response NO_RESPONSE noResponse +NCIT:C123614 Stringent Complete Response STRINGENT_COMPLETE_RESPONSE stringentCompleteResponse +NCIT:C123598 Minimal Response MINIMAL_RESPONSE minimalResponse +NCIT:C4870 Complete Remission COMPLETE_REMISSION completeRemission +NCIT:C18058 Partial Remission PARTIAL_REMISSION partialRemission +NCIT:C70604 Primary Refractory PRIMARY_REFRACTORY primaryRefractory +NCIT:C142357 iRECIST Complete Response iRECIST_COMPLETE_RESPONSE iRECISTCompleteResponse +NCIT:C142356 iRECIST Confirmed Progressive Disease iRECIST_CONFIRMED_PROGRESSIVE_DISEASE iRECISTConfirmedProgressiveDisease +NCIT:C142358 iRECIST Partial Response iRECIST_PARTIAL_RESPONSE iRECISTPartialResponse +NCIT:C142359 iRECIST Stable Disease iRECIST_STABLE_DISEASE iRECISTStableDisease +NCIT:C142360 iRECIST Unconfirmed Progressive Disease iRECIST_UNCONFIRMED_PROGRESSIVE_DISEASE iRECISTUnconfirmedProgressiveDisease diff --git a/docs/constants.rst b/docs/constants.rst index 2c3818eb..9c666817 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -168,8 +168,19 @@ These codes from `NCI Thesaurus `_ ca :header: "id", "label", "function name" :widths: 30, 200, 200 - "NCIT:C102560", "Favorable", "favorable()" - "NCIT:C102561", "Unfavorable", "unfavorable()" + "NCIT:C123584", "Favorable Response", "favorableResponse()" + "NCIT:C123617", "Unfavorable Response", "unfavorableResponse()" + "NCIT:C123600", "No Response", "noResponse()" + "NCIT:C123614", "Stringent Complete Response", "stringentCompleteResponse()" + "NCIT:C123598", "Minimal Response", "minimalResponse()" + "NCIT:C4870", "Complete Remission", "completeRemission()" + "NCIT:C18058", "Partial Remission", "partialRemission()" + "NCIT:C70604", "Primary Refractory", "primaryRefractory()" + "NCIT:C142357", "iRECIST Complete Response", "iRECISTCompleteResponse()" + "NCIT:C142356", "iRECIST Confirmed Progressive Disease", "iRECISTConfirmedProgressiveDisease()" + "NCIT:C142358", "iRECIST Partial Response", "iRECISTPartialResponse()" + "NCIT:C142359", "iRECIST Stable Disease", "iRECISTStableDisease()" + "NCIT:C142360", "iRECIST Unconfirmed Progressive Disease", "iRECISTUnconfirmedProgressiveDisease()" SpatialPattern diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java index dec48018..71e7a47e 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java @@ -5,11 +5,33 @@ public class Response { - private static final OntologyClass FAVORABLE = OntologyClassBuilder.ontologyClass("NCIT:C102560", "Favorable"); - private static final OntologyClass UNFAVORABLE = OntologyClassBuilder.ontologyClass("NCIT:C102561", "Unfavorable"); + private static final OntologyClass FAVORABLE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123584", "Favorable Response"); + private static final OntologyClass UNFAVORABLE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123617", "Unfavorable Response"); + private static final OntologyClass NO_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123600", "No Response"); + private static final OntologyClass STRINGENT_COMPLETE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123614", "Stringent Complete Response"); + private static final OntologyClass MINIMAL_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C123598", "Minimal Response"); + private static final OntologyClass COMPLETE_REMISSION = OntologyClassBuilder.ontologyClass("NCIT:C4870", "Complete Remission"); + private static final OntologyClass PARTIAL_REMISSION = OntologyClassBuilder.ontologyClass("NCIT:C18058", "Partial Remission"); + private static final OntologyClass PRIMARY_REFRACTORY = OntologyClassBuilder.ontologyClass("NCIT:C70604", "Primary Refractory"); + private static final OntologyClass iRECIST_COMPLETE_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C142357", "iRECIST Complete Response"); + private static final OntologyClass iRECIST_CONFIRMED_PROGRESSIVE_DISEASE = OntologyClassBuilder.ontologyClass("NCIT:C142356", "iRECIST Confirmed Progressive Disease"); + private static final OntologyClass iRECIST_PARTIAL_RESPONSE = OntologyClassBuilder.ontologyClass("NCIT:C142358", "iRECIST Partial Response"); + private static final OntologyClass iRECIST_STABLE_DISEASE = OntologyClassBuilder.ontologyClass("NCIT:C142359", "iRECIST Stable Disease"); + private static final OntologyClass iRECIST_UNCONFIRMED_PROGRESSIVE_DISEASE = OntologyClassBuilder.ontologyClass("NCIT:C142360", "iRECIST Unconfirmed Progressive Disease"); - public static OntologyClass favorable() { return FAVORABLE; } - public static OntologyClass unfavorable() { return UNFAVORABLE; } + public static OntologyClass favorableResponse() { return FAVORABLE_RESPONSE; } + public static OntologyClass unfavorableResponse() { return UNFAVORABLE_RESPONSE; } + public static OntologyClass noResponse() { return NO_RESPONSE; } + public static OntologyClass stringentCompleteResponse() { return STRINGENT_COMPLETE_RESPONSE; } + public static OntologyClass minimalResponse() { return MINIMAL_RESPONSE; } + public static OntologyClass completeRemission() { return COMPLETE_REMISSION; } + public static OntologyClass partialRemission() { return PARTIAL_REMISSION; } + public static OntologyClass primaryRefractory() { return PRIMARY_REFRACTORY; } + public static OntologyClass iRECISTCompleteResponse() { return iRECIST_COMPLETE_RESPONSE; } + public static OntologyClass iRECISTConfirmedProgressiveDisease() { return iRECIST_CONFIRMED_PROGRESSIVE_DISEASE; } + public static OntologyClass iRECISTPartialResponse() { return iRECIST_PARTIAL_RESPONSE; } + public static OntologyClass iRECISTStableDisease() { return iRECIST_STABLE_DISEASE; } + public static OntologyClass iRECISTUnconfirmedProgressiveDisease() { return iRECIST_UNCONFIRMED_PROGRESSIVE_DISEASE; } } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java index 11219cf0..7ad3d7c5 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java @@ -10,7 +10,7 @@ import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; import static org.phenopackets.phenopackettools.builder.constants.Assays.creatineKinaseActivity; -import static org.phenopackets.phenopackettools.builder.constants.Response.favorable; +import static org.phenopackets.phenopackettools.builder.constants.Response.favorableResponse; import static org.phenopackets.phenopackettools.builder.constants.SpatialPattern.generalized; import static org.phenopackets.phenopackettools.builder.constants.Unit.*; @@ -217,7 +217,7 @@ private MedicalAction vitaminEtreatment() { OntologyClass vitE = ontologyClass("DrugCentral:257", "Vitamin E"); TreatmentBuilder tbuilder = TreatmentBuilder.oralAdministration(vitE); return MedicalActionBuilder.builder(tbuilder.build()) - .responseToTreatment(favorable()) + .responseToTreatment(favorableResponse()) .build(); } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java index 36ebadcf..e5230a27 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java @@ -9,7 +9,7 @@ import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; import static org.phenopackets.phenopackettools.builder.constants.MedicalActions.*; -import static org.phenopackets.phenopackettools.builder.constants.Response.favorable; +import static org.phenopackets.phenopackettools.builder.constants.Response.favorableResponse; import static org.phenopackets.phenopackettools.builder.constants.Unit.*; /** @@ -110,7 +110,7 @@ private MedicalAction treatment() { TimeInterval interval = TimeIntervalBuilder.of("2020-09-02", "2021-03-02"); return MedicalActionBuilder .intravenousAdministration(ivIg, quantity, everySixWeeks, interval) - .responseToTreatment(favorable()) + .responseToTreatment(favorableResponse()) .build(); } From 4dbbcbb094dc39090310e8818141e7f7715fe13c Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Tue, 1 Nov 2022 12:50:38 -0400 Subject: [PATCH 22/79] pathological TNM staging constants --- constants/pathologicalTnm.tsv | 37 +++++++++++++++++++++++++++ constants/rtd_texts.txt | 3 ++- docs/constants.rst | 47 +++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 constants/pathologicalTnm.tsv diff --git a/constants/pathologicalTnm.tsv b/constants/pathologicalTnm.tsv new file mode 100644 index 00000000..4f4573cd --- /dev/null +++ b/constants/pathologicalTnm.tsv @@ -0,0 +1,37 @@ +ontology.id ontology.label variable.name function.name +NCIT:C48740 pM0 Stage Finding PM0_STAGE_FINDING pM0StageFinding +NCIT:C48741 pM1 Stage Finding PM1_STAGE_FINDING pM1StageFinding +NCIT:C48742 pM1a Stage Finding PM1A_STAGE_FINDING pM1aStageFinding +NCIT:C48743 pM1b Stage Finding PM1B_STAGE_FINDING pM1bStageFinding +NCIT:C48744 pM1c Stage Finding PM1C_STAGE_FINDING pM1cStageFinding +NCIT:C48745 pN0 Stage Finding PN0_STAGE_FINDING pN0StageFinding +NCIT:C48746 pN1 Stage Finding PN1_STAGE_FINDING pN1StageFinding +NCIT:C48747 pN1a Stage Finding PN1A_STAGE_FINDING pN1aStageFinding +NCIT:C48748 pN1b Stage Finding PN1B_STAGE_FINDING pN1bStageFinding +NCIT:C48749 pN1c Stage Finding PN1C_STAGE_FINDING pN1cStageFinding +NCIT:C48750 pN2 Stage Finding PN2_STAGE_FINDING pN2StageFinding +NCIT:C48751 pN2a Stage Finding PN2A_STAGE_FINDING pN2aStageFinding +NCIT:C48752 pN2b Stage Finding PN2B_STAGE_FINDING pN2bStageFinding +NCIT:C48753 pN2c Stage Finding PN2C_STAGE_FINDING pN2cStageFinding +NCIT:C48754 pN3 Stage Finding PN3_STAGE_FINDING pN3StageFinding +NCIT:C48755 pN3a Stage Finding PN3A_STAGE_FINDING pN3aStageFinding +NCIT:C48756 pN3b Stage Finding PN3B_STAGE_FINDING pN3bStageFinding +NCIT:C48757 pN3c Stage Finding PN3C_STAGE_FINDING pN3cStageFinding +NCIT:C48758 pT0 Stage Finding PT0_STAGE_FINDING pT0StageFinding +NCIT:C48759 pT1 Stage Finding PT1_STAGE_FINDING pT1StageFinding +NCIT:C48760 pT1a Stage Finding PT1A_STAGE_FINDING pT1aStageFinding +NCIT:C48761 pT1b Stage Finding PT1B_STAGE_FINDING pT1bStageFinding +NCIT:C48763 pT1c Stage Finding PT1C_STAGE_FINDING pT1cStageFinding +NCIT:C48764 pT2 Stage Finding PT2_STAGE_FINDING pT2StageFinding +NCIT:C48765 pT2a Stage Finding PT2A_STAGE_FINDING pT2aStageFinding +NCIT:C48766 pT2b Stage Finding PT2B_STAGE_FINDING pT2bStageFinding +NCIT:C48767 pT2c Stage Finding PT2C_STAGE_FINDING pT2cStageFinding +NCIT:C48768 pT3 Stage Finding PT3_STAGE_FINDING pT3StageFinding +NCIT:C48769 pT3a Stage Finding PT3A_STAGE_FINDING pT3aStageFinding +NCIT:C48770 pT3b Stage Finding PT3B_STAGE_FINDING pT3bStageFinding +NCIT:C48771 pT3c Stage Finding PT3C_STAGE_FINDING pT3cStageFinding +NCIT:C48772 pT4 Stage Finding PT4_STAGE_FINDING pT4StageFinding +NCIT:C48773 pT4a Stage Finding PT4A_STAGE_FINDING pT4aStageFinding +NCIT:C48774 pT4b Stage Finding PT4B_STAGE_FINDING pT4bStageFinding +NCIT:C48775 pT4c Stage Finding PT4C_STAGE_FINDING pT4cStageFinding +NCIT:C48776 pT4d Stage Finding PT4D_STAGE_FINDING pT4dStageFinding diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index cb6771ce..46e28b9a 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -9,4 +9,5 @@ SpatialPattern|Modifier terms from the `HPO `_ are use Unit|With some exceptions, terms from the `The Unified Code for Units of Measure `_ are used to denote units. Response|These codes from `NCI Thesaurus `_ can be used to code the overall response of a patient to treatment. Favorable and Unfavorble can be used for general purposes and the remaining codes are intended to be used for oncology. Assays|If possible, `LOINC `_ codes should be used to specify laboratory test assays. -Gender|`LOINC `_ codes should be used to specify self-reported gender. \ No newline at end of file +Gender|`LOINC `_ codes should be used to specify self-reported gender. +pathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index 9c666817..f39a6bfc 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -262,3 +262,50 @@ With some exceptions, terms from the `The Unified Code for Units of Measure Date: Tue, 1 Nov 2022 13:34:15 -0400 Subject: [PATCH 23/79] correcting name of var and adding documentation for GeneDescriptor --- .../builders/GeneDescriptorBuilder.java | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java index 6c281190..2eb52c61 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/GeneDescriptorBuilder.java @@ -8,16 +8,27 @@ public class GeneDescriptorBuilder { private final GeneDescriptor.Builder builder; - private GeneDescriptorBuilder(String identifier, String symbol) { - builder = GeneDescriptor.newBuilder().setValueId(identifier).setSymbol(symbol); + private GeneDescriptorBuilder(String valueId, String symbol) { + builder = GeneDescriptor.newBuilder().setValueId(valueId).setSymbol(symbol); } - public static GeneDescriptor of(String identifier, String symbol) { - return GeneDescriptor.newBuilder().setValueId(identifier).setSymbol(symbol).build(); + /** + * @param valueId Official identifier of the gene, e.g., HGNC:3603 + * @param symbol Official gene symbol, e.g., FBN1 + * @return completely built {@link GeneDescriptor} object + */ + public static GeneDescriptor of(String valueId, String symbol) { + return GeneDescriptor.newBuilder().setValueId(valueId).setSymbol(symbol).build(); } - public static GeneDescriptorBuilder builder(String identifier, String symbol) { - return new GeneDescriptorBuilder(identifier, symbol); + + /** + * @param valueId Official identifier of the gene, e.g., HGNC:3603 + * @param symbol Official gene symbol, e.g., FBN1 + * @return GeneDescriptorBuilder that can be used to set additional field values + */ + public static GeneDescriptorBuilder builder(String valueId, String symbol) { + return new GeneDescriptorBuilder(valueId, symbol); } public GeneDescriptorBuilder description(String desc) { From 4e1a343328abe928516b88fbc08ff5292a93d2c9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 1 Nov 2022 15:15:19 -0400 Subject: [PATCH 24/79] Change `pathologicalTnm` to `PathologicalTnm`. Signed-off-by: Daniel Danis --- ...athologicalTnm.tsv => PathologicalTnm.tsv} | 0 constants/create_classes.py | 3 + constants/rtd_texts.txt | 2 +- docs/constants.rst | 94 +++++++++---------- 4 files changed, 51 insertions(+), 48 deletions(-) rename constants/{pathologicalTnm.tsv => PathologicalTnm.tsv} (100%) diff --git a/constants/pathologicalTnm.tsv b/constants/PathologicalTnm.tsv similarity index 100% rename from constants/pathologicalTnm.tsv rename to constants/PathologicalTnm.tsv diff --git a/constants/create_classes.py b/constants/create_classes.py index ffd62bfd..52e19f8a 100644 --- a/constants/create_classes.py +++ b/constants/create_classes.py @@ -54,6 +54,9 @@ def name(self): def items(self): return self._constant_items + def __repr__(self): + return f"Entry name={self._name} {len(self._constant_items)} items" + def parse_csv(fname): if not isfile(fname): diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index 46e28b9a..31572502 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -10,4 +10,4 @@ Unit|With some exceptions, terms from the `The Unified Code for Units of Measure Response|These codes from `NCI Thesaurus `_ can be used to code the overall response of a patient to treatment. Favorable and Unfavorble can be used for general purposes and the remaining codes are intended to be used for oncology. Assays|If possible, `LOINC `_ codes should be used to specify laboratory test assays. Gender|`LOINC `_ codes should be used to specify self-reported gender. -pathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). \ No newline at end of file +PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index f39a6bfc..919e8f5e 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -159,6 +159,53 @@ Terms from the `UBERON ontology `_ "UBERON:0002370", "thymus", "thymus()" +PathologicalTnm +^^^^^^^^^^^^^^^ + +TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C48740", "pM0 Stage Finding", "pM0StageFinding()" + "NCIT:C48741", "pM1 Stage Finding", "pM1StageFinding()" + "NCIT:C48742", "pM1a Stage Finding", "pM1aStageFinding()" + "NCIT:C48743", "pM1b Stage Finding", "pM1bStageFinding()" + "NCIT:C48744", "pM1c Stage Finding", "pM1cStageFinding()" + "NCIT:C48745", "pN0 Stage Finding", "pN0StageFinding()" + "NCIT:C48746", "pN1 Stage Finding", "pN1StageFinding()" + "NCIT:C48747", "pN1a Stage Finding", "pN1aStageFinding()" + "NCIT:C48748", "pN1b Stage Finding", "pN1bStageFinding()" + "NCIT:C48749", "pN1c Stage Finding", "pN1cStageFinding()" + "NCIT:C48750", "pN2 Stage Finding", "pN2StageFinding()" + "NCIT:C48751", "pN2a Stage Finding", "pN2aStageFinding()" + "NCIT:C48752", "pN2b Stage Finding", "pN2bStageFinding()" + "NCIT:C48753", "pN2c Stage Finding", "pN2cStageFinding()" + "NCIT:C48754", "pN3 Stage Finding", "pN3StageFinding()" + "NCIT:C48755", "pN3a Stage Finding", "pN3aStageFinding()" + "NCIT:C48756", "pN3b Stage Finding", "pN3bStageFinding()" + "NCIT:C48757", "pN3c Stage Finding", "pN3cStageFinding()" + "NCIT:C48758", "pT0 Stage Finding", "pT0StageFinding()" + "NCIT:C48759", "pT1 Stage Finding", "pT1StageFinding()" + "NCIT:C48760", "pT1a Stage Finding", "pT1aStageFinding()" + "NCIT:C48761", "pT1b Stage Finding", "pT1bStageFinding()" + "NCIT:C48763", "pT1c Stage Finding", "pT1cStageFinding()" + "NCIT:C48764", "pT2 Stage Finding", "pT2StageFinding()" + "NCIT:C48765", "pT2a Stage Finding", "pT2aStageFinding()" + "NCIT:C48766", "pT2b Stage Finding", "pT2bStageFinding()" + "NCIT:C48767", "pT2c Stage Finding", "pT2cStageFinding()" + "NCIT:C48768", "pT3 Stage Finding", "pT3StageFinding()" + "NCIT:C48769", "pT3a Stage Finding", "pT3aStageFinding()" + "NCIT:C48770", "pT3b Stage Finding", "pT3bStageFinding()" + "NCIT:C48771", "pT3c Stage Finding", "pT3cStageFinding()" + "NCIT:C48772", "pT4 Stage Finding", "pT4StageFinding()" + "NCIT:C48773", "pT4a Stage Finding", "pT4aStageFinding()" + "NCIT:C48774", "pT4b Stage Finding", "pT4bStageFinding()" + "NCIT:C48775", "pT4c Stage Finding", "pT4cStageFinding()" + "NCIT:C48776", "pT4d Stage Finding", "pT4dStageFinding()" + + Response ^^^^^^^^ @@ -262,50 +309,3 @@ With some exceptions, terms from the `The Unified Code for Units of Measure Date: Tue, 1 Nov 2022 15:15:36 -0400 Subject: [PATCH 25/79] Generate the Java code. Signed-off-by: Daniel Danis --- .../builder/constants/PathologicalTnm.java | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java new file mode 100644 index 00000000..fa9b9d37 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java @@ -0,0 +1,83 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class PathologicalTnm { + + private static final OntologyClass PM0_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48740", "pM0 Stage Finding"); + private static final OntologyClass PM1_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48741", "pM1 Stage Finding"); + private static final OntologyClass PM1A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48742", "pM1a Stage Finding"); + private static final OntologyClass PM1B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48743", "pM1b Stage Finding"); + private static final OntologyClass PM1C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48744", "pM1c Stage Finding"); + private static final OntologyClass PN0_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48745", "pN0 Stage Finding"); + private static final OntologyClass PN1_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48746", "pN1 Stage Finding"); + private static final OntologyClass PN1A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48747", "pN1a Stage Finding"); + private static final OntologyClass PN1B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48748", "pN1b Stage Finding"); + private static final OntologyClass PN1C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48749", "pN1c Stage Finding"); + private static final OntologyClass PN2_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48750", "pN2 Stage Finding"); + private static final OntologyClass PN2A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48751", "pN2a Stage Finding"); + private static final OntologyClass PN2B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48752", "pN2b Stage Finding"); + private static final OntologyClass PN2C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48753", "pN2c Stage Finding"); + private static final OntologyClass PN3_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48754", "pN3 Stage Finding"); + private static final OntologyClass PN3A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48755", "pN3a Stage Finding"); + private static final OntologyClass PN3B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48756", "pN3b Stage Finding"); + private static final OntologyClass PN3C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48757", "pN3c Stage Finding"); + private static final OntologyClass PT0_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48758", "pT0 Stage Finding"); + private static final OntologyClass PT1_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48759", "pT1 Stage Finding"); + private static final OntologyClass PT1A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48760", "pT1a Stage Finding"); + private static final OntologyClass PT1B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48761", "pT1b Stage Finding"); + private static final OntologyClass PT1C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48763", "pT1c Stage Finding"); + private static final OntologyClass PT2_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48764", "pT2 Stage Finding"); + private static final OntologyClass PT2A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48765", "pT2a Stage Finding"); + private static final OntologyClass PT2B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48766", "pT2b Stage Finding"); + private static final OntologyClass PT2C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48767", "pT2c Stage Finding"); + private static final OntologyClass PT3_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48768", "pT3 Stage Finding"); + private static final OntologyClass PT3A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48769", "pT3a Stage Finding"); + private static final OntologyClass PT3B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48770", "pT3b Stage Finding"); + private static final OntologyClass PT3C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48771", "pT3c Stage Finding"); + private static final OntologyClass PT4_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48772", "pT4 Stage Finding"); + private static final OntologyClass PT4A_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48773", "pT4a Stage Finding"); + private static final OntologyClass PT4B_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48774", "pT4b Stage Finding"); + private static final OntologyClass PT4C_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48775", "pT4c Stage Finding"); + private static final OntologyClass PT4D_STAGE_FINDING = OntologyClassBuilder.ontologyClass("NCIT:C48776", "pT4d Stage Finding"); + + + public static OntologyClass pM0StageFinding() { return PM0_STAGE_FINDING; } + public static OntologyClass pM1StageFinding() { return PM1_STAGE_FINDING; } + public static OntologyClass pM1aStageFinding() { return PM1A_STAGE_FINDING; } + public static OntologyClass pM1bStageFinding() { return PM1B_STAGE_FINDING; } + public static OntologyClass pM1cStageFinding() { return PM1C_STAGE_FINDING; } + public static OntologyClass pN0StageFinding() { return PN0_STAGE_FINDING; } + public static OntologyClass pN1StageFinding() { return PN1_STAGE_FINDING; } + public static OntologyClass pN1aStageFinding() { return PN1A_STAGE_FINDING; } + public static OntologyClass pN1bStageFinding() { return PN1B_STAGE_FINDING; } + public static OntologyClass pN1cStageFinding() { return PN1C_STAGE_FINDING; } + public static OntologyClass pN2StageFinding() { return PN2_STAGE_FINDING; } + public static OntologyClass pN2aStageFinding() { return PN2A_STAGE_FINDING; } + public static OntologyClass pN2bStageFinding() { return PN2B_STAGE_FINDING; } + public static OntologyClass pN2cStageFinding() { return PN2C_STAGE_FINDING; } + public static OntologyClass pN3StageFinding() { return PN3_STAGE_FINDING; } + public static OntologyClass pN3aStageFinding() { return PN3A_STAGE_FINDING; } + public static OntologyClass pN3bStageFinding() { return PN3B_STAGE_FINDING; } + public static OntologyClass pN3cStageFinding() { return PN3C_STAGE_FINDING; } + public static OntologyClass pT0StageFinding() { return PT0_STAGE_FINDING; } + public static OntologyClass pT1StageFinding() { return PT1_STAGE_FINDING; } + public static OntologyClass pT1aStageFinding() { return PT1A_STAGE_FINDING; } + public static OntologyClass pT1bStageFinding() { return PT1B_STAGE_FINDING; } + public static OntologyClass pT1cStageFinding() { return PT1C_STAGE_FINDING; } + public static OntologyClass pT2StageFinding() { return PT2_STAGE_FINDING; } + public static OntologyClass pT2aStageFinding() { return PT2A_STAGE_FINDING; } + public static OntologyClass pT2bStageFinding() { return PT2B_STAGE_FINDING; } + public static OntologyClass pT2cStageFinding() { return PT2C_STAGE_FINDING; } + public static OntologyClass pT3StageFinding() { return PT3_STAGE_FINDING; } + public static OntologyClass pT3aStageFinding() { return PT3A_STAGE_FINDING; } + public static OntologyClass pT3bStageFinding() { return PT3B_STAGE_FINDING; } + public static OntologyClass pT3cStageFinding() { return PT3C_STAGE_FINDING; } + public static OntologyClass pT4StageFinding() { return PT4_STAGE_FINDING; } + public static OntologyClass pT4aStageFinding() { return PT4A_STAGE_FINDING; } + public static OntologyClass pT4bStageFinding() { return PT4B_STAGE_FINDING; } + public static OntologyClass pT4cStageFinding() { return PT4C_STAGE_FINDING; } + public static OntologyClass pT4dStageFinding() { return PT4D_STAGE_FINDING; } + +} From bc450a63ed883ac9ab80e96c4ab74e02259a11cc Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 1 Nov 2022 16:27:48 -0400 Subject: [PATCH 26/79] Proofread VRSATILE JSON schema specification. Signed-off-by: Daniel Danis --- .../validator/jsonschema/vrsatile.json | 67 ++++++++++++------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json index 217883d1..1af561e1 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json @@ -1,8 +1,8 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema#", "$id": "https://www.ga4gh.org/phenopackets", - "title": "Phenopacket", - "description": "Schema for Global Alliance for Genomics and Health (GA4GH) Phenopacket", + "title": "VRS Added Tools for Interoperable Loquacious Exchange", + "description": "VRSATILE: A set of proposed extensions for GA4GH's Variation Representation Specification (VRS) to enable interoperable exchange of common descriptive data alongside variation concepts", "type": "object", "definitions": { "extension": { @@ -97,17 +97,26 @@ "type": "string" }, "variation": { - "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json#/definitions/Variation" + "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json#/definitions/Variation", + "description": "The VRS Variation object" }, "label": { - "type": "string" + "type": "string", + "description": "A primary label for the variation" + }, + "description": { + "type": "string", + "description": "A free-text description of the variation" + }, + "geneContext": { + "$ref": "#/definitions/geneDescriptor", + "description": "A specific gene context that applies to this variant" }, "expressions": { "type": "array", "items": { "$ref": "#/definitions/expression" - }, - "minItems": 0 + } }, "vcfRecord": { "$ref": "#/definitions/vcfRecord" @@ -116,22 +125,19 @@ "type": "array", "items": { "type": "string" - }, - "minItems": 0 + } }, "alternateLabels": { "type": "array", "items": { "type": "string" - }, - "minItems": 0 + } }, "extensions": { "type": "array", "items": { "$ref": "#/definitions/extension" - }, - "minItems": 0 + } }, "moleculeContext": { "enum": [ @@ -142,16 +148,21 @@ ] }, "structuralType" : { - "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/v2/base.json#/definitions/ontologyClass" + "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/v2/base.json#/definitions/ontologyClass", + "description": "The structural variant type associated with this variant, such as a substitution, deletion, or fusion. We RECOMMEND using a descendent term of SO:0001537." }, - "vrs_ref_allele_seq": { + "vrsRefAlleleSeq": { "type": "string", - "description": "A Sequence corresponding to a “ref allele”, describing the sequence expected at a SequenceLocation reference" + "description": "A Sequence corresponding to a \"ref allele\", describing the sequence expected at a SequenceLocation reference" }, "allelicState" : { "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/v2/base.json#/definitions/ontologyClass" } - } + }, + "required": [ + "id", "moleculeContext" + ], + "additionalProperties": false }, "geneDescriptor" : { "type": "object", @@ -166,30 +177,34 @@ }, "description": { "type": "string", - "description": "Free-text description" + "description": "Free-text description of the gene" }, "alternateIds" : { "type": "array", + "description": "Alternative identifier(s) of the gene", "items": { "type": "string" - }, - "minItems": 0 + } }, - "alternateSymbols" : { + "xrefs" : { "type": "array", + "description": "Related concept IDs (e.g. gene ortholog IDs) may be placed in xrefs", "items": { "type": "string" - }, - "minItems": 0 + } }, - "xrefs" : { + "alternateSymbols" : { "type": "array", + "description": "Alternative symbol(s) of the gene", "items": { "type": "string" - }, - "minItems": 0 + } } - } + }, + "required": [ + "valueId", "symbol" + ], + "additionalProperties": false } } } \ No newline at end of file From 56a53d572a9f5e94f53d12dfcccba661cf045c2d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 1 Nov 2022 16:33:22 -0400 Subject: [PATCH 27/79] Add the required `MoleculeContext` field into the test phenopacket, family, and cohort. Signed-off-by: Daniel Danis --- .../phenopackettools/validator/jsonschema/bethlem-myopathy.json | 1 + .../phenopackettools/validator/jsonschema/example-cohort.json | 1 + .../phenopackettools/validator/jsonschema/example-family.json | 1 + 3 files changed, 3 insertions(+) diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json index 1cff08fe..6be501a1 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/bethlem-myopathy.json @@ -378,6 +378,7 @@ "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { "id": "variant id", + "moleculeContext": "transcript", "expressions": [ { "syntax": "hgvs", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json index a5517087..c55001a7 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-cohort.json @@ -378,6 +378,7 @@ "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { "id": "variant id", + "moleculeContext": "transcript", "expressions": [ { "syntax": "hgvs", diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json index 54cf1aa8..6c30d25f 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/example-family.json @@ -377,6 +377,7 @@ "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { "id": "variant id", + "moleculeContext": "transcript", "expressions": [ { "syntax": "hgvs", From 962de6ec8e4a274f9e9c811f14e444ef7d65bfcb Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 1 Nov 2022 23:37:18 -0400 Subject: [PATCH 28/79] Draft organ system HPO validator. Signed-off-by: Daniel Danis --- .../orgsys/AbstractOrganSystemValidator.java | 117 ++++++++++++++++++ .../orgsys/CohortHpoOrganSystemValidator.java | 23 ++++ .../orgsys/FamilyHpoOrganSystemValidator.java | 25 ++++ .../PhenopacketHpoOrganSystemValidator.java | 21 ++++ 4 files changed, 186 insertions(+) create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java new file mode 100644 index 00000000..9acc8830 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java @@ -0,0 +1,117 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.Term; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Stream; + +abstract class AbstractOrganSystemValidator extends BaseHpoValidator { + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractOrganSystemValidator.class); + + private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( + "HpoOrganSystemValidator", + "HPO organ system validator", + "Validate that HPO terms are well formatted, present, and non-obsolete based on the provided HPO"); + + private static final String MISSING_ORGAN_SYSTEM_CATEGORY = "Missing organ system annotation"; + + protected final List organSystemTermIds; + + protected AbstractOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + super(hpo); + this.organSystemTermIds = Objects.requireNonNull(organSystemTermIds).stream() + .distinct() + .filter(organSystemTermIdIsInOntology(hpo)) + .sorted() + .toList(); + } + + private static Predicate organSystemTermIdIsInOntology(Ontology hpo) { + return organSystemTermId -> { + if (hpo.containsTerm(organSystemTermId)) { + return true; + } else { + LOGGER.warn("{} is not present in the ontology", organSystemTermId.getValue()); + return false; + } + }; + } + + @Override + public ValidatorInfo validatorInfo() { + return VALIDATOR_INFO; + } + + @Override + public List validate(T component) { + return getPhenopackets(component) + .flatMap(p -> checkPhenotypicFeatures(p.getSubject().getId(), p.getPhenotypicFeaturesList())) + .toList(); + } + + protected abstract Stream getPhenopackets(T component); + + private Stream checkPhenotypicFeatures(String individualId, List features) { + // Get a list of observed phenotypic feature term IDs. + List phenotypeFeatures = features.stream() + .filter(pf -> !pf.getExcluded()) // TODO - should we only work with the observed features? + .map(PhenotypicFeature::getType) + .map(toTermId(individualId)) + .flatMap(Optional::stream) + .toList(); + + + Stream.Builder results = Stream.builder(); + // Check we have at least one phenotypeFeature (pf) that is a descendant of given organSystemId + // and report otherwise. + organSystemLoop: + for (TermId organSystemId : organSystemTermIds) { + for (TermId pf : phenotypeFeatures) { + if (OntologyAlgorithm.existsPath(hpo, pf, organSystemId)) { + continue organSystemLoop; // It only takes one termId to annotate an organ system. + } + } + + // If we get here, then the organSystemId is not annotated, and we report a validation error. + Term organSystem = hpo.getTermMap().get(organSystemId); + ValidationResult result = ValidationResult.error(VALIDATOR_INFO, + MISSING_ORGAN_SYSTEM_CATEGORY, + "Missing annotation for %s [%s] in '%s'" + .formatted(organSystem.getName(), organSystem.id().getValue(), individualId)); + results.add(result); + } + + return results.build(); + } + + /** + * @return a function that maps {@link OntologyClass} into a {@link TermId} and emit warning otherwise. + */ + private static Function> toTermId(String individualId) { + return oc -> { + try { + return Optional.of(TermId.of(oc.getId())); + } catch (PhenolRuntimeException e) { + LOGGER.warn("Invalid term ID {} in individual {}", oc.getId(), individualId); + return Optional.empty(); + } + }; + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java new file mode 100644 index 00000000..a15d084a --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java @@ -0,0 +1,23 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.CohortOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.Collection; +import java.util.stream.Stream; + +public class CohortHpoOrganSystemValidator extends AbstractOrganSystemValidator { + + protected CohortHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { + super(hpo, organSystemTermIds); + } + + @Override + protected Stream getPhenopackets(CohortOrBuilder component) { + return component.getMembersOrBuilderList().stream(); + } + + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java new file mode 100644 index 00000000..96b2d80c --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java @@ -0,0 +1,25 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.Collection; +import java.util.stream.Stream; + +public class FamilyHpoOrganSystemValidator extends AbstractOrganSystemValidator { + + protected FamilyHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { + super(hpo, organSystemTermIds); + } + + @Override + protected Stream getPhenopackets(FamilyOrBuilder component) { + return Stream.concat( + Stream.of(component.getProband()), + component.getRelativesList().stream() + ); + } + +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java new file mode 100644 index 00000000..cb79958d --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java @@ -0,0 +1,21 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; + +import java.util.List; +import java.util.stream.Stream; + +public class PhenopacketHpoOrganSystemValidator extends AbstractOrganSystemValidator { + + public PhenopacketHpoOrganSystemValidator(Ontology hpo, + List organSystemTerms) { + super(hpo, organSystemTerms); + } + + @Override + protected Stream getPhenopackets(PhenopacketOrBuilder component) { + return Stream.of(component); + } +} From 095f84ab804220fff83bb1961da04d1e62be0c42 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 08:49:37 -0400 Subject: [PATCH 29/79] Fixing typo in isoISO8601onset method --- .../builders/PhenotypicFeatureBuilder.java | 2 +- .../AtaxiaWithVitaminEdeficiency.java | 26 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java index dcd434ac..da10dbba 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java @@ -47,7 +47,7 @@ public PhenotypicFeatureBuilder onset(TimeElement time) { /** * @param isoISO8601 A string such as P10Y4M2D representing the age of onset/observation */ - public PhenotypicFeatureBuilder isoISO8601onset(String isoISO8601) { + public PhenotypicFeatureBuilder iso8601onset(String isoISO8601) { builder.setOnset(TimeElements.age(isoISO8601)); return this; } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java index 7ad3d7c5..c7909069 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java @@ -179,30 +179,30 @@ private Interpretation aved() { private List getPhenotypicFeatures() { String iso8601age = "P16Y"; var pf1 = PhenotypicFeatureBuilder.builder("HP:0002066","Gait ataxia") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf2 = PhenotypicFeatureBuilder.builder("HP:0001308","Tongue fasciculations") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf3 = PhenotypicFeatureBuilder.builder("HP:0002080","Intention tremor") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf4 = PhenotypicFeatureBuilder.builder("HP:0002075","Dysdiadochokinesis") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf5 = PhenotypicFeatureBuilder.builder("HP:0001251","Ataxia") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf6 = PhenotypicFeatureBuilder.builder("HP:0001284","Areflexia") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf7 = PhenotypicFeatureBuilder.builder("HP:0011448","Ankle clonus") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf8 = PhenotypicFeatureBuilder.builder("HP:0003690","Limb muscle weakness") - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf9 = PhenotypicFeatureBuilder.builder("HP:0003474","Somatic sensory dysfunction") .excluded() - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf10 = PhenotypicFeatureBuilder.builder("HP:0002599","Head titubation") .excluded() - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); var pf11 = PhenotypicFeatureBuilder.builder("HP:0031910","Abnormal cranial nerve physiology") .excluded() - .isoISO8601onset(iso8601age).build(); + .iso8601onset(iso8601age).build(); return List.of(pf1,pf2,pf3, pf4, pf5, pf6, pf7, pf8, pf9, pf10,pf11); } @@ -227,11 +227,11 @@ private MedicalAction vitaminEtreatment() { private List getMedicalHistory() { String iso8601age = "P10Y"; var gaitDisturbance = PhenotypicFeatureBuilder.builder("HP:0001288", "Gait disturbance") - .isoISO8601onset(iso8601age) + .iso8601onset(iso8601age) .build(); var weakness = PhenotypicFeatureBuilder.builder("HP:0001324", "Muscle weakness") .addModifier(generalized()) - .isoISO8601onset(iso8601age) + .iso8601onset(iso8601age) .build(); return List.of(gaitDisturbance, weakness); } From ad7247c91a4948602631c3a41ca32798581c614f Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 09:35:05 -0400 Subject: [PATCH 30/79] Adding Severity as a Constant --- constants/Severity.tsv | 6 ++ constants/rtd_texts.txt | 3 +- docs/constants.rst | 16 ++++++ .../builder/constants/Severity.java | 55 ++++--------------- 4 files changed, 35 insertions(+), 45 deletions(-) create mode 100644 constants/Severity.tsv diff --git a/constants/Severity.tsv b/constants/Severity.tsv new file mode 100644 index 00000000..56cf75ce --- /dev/null +++ b/constants/Severity.tsv @@ -0,0 +1,6 @@ +ontology.id ontology.label variable.name function.name +HP:0012827 Borderline BORDERLINE borderline +HP:0012825 Mild MILD mild +HP:0012826 Moderate MODERATE moderate +HP:0012828 Severe SEVERE severe +HP:0012829 Profound PROFOUND profound diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index 31572502..0aacfe0c 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -10,4 +10,5 @@ Unit|With some exceptions, terms from the `The Unified Code for Units of Measure Response|These codes from `NCI Thesaurus `_ can be used to code the overall response of a patient to treatment. Favorable and Unfavorble can be used for general purposes and the remaining codes are intended to be used for oncology. Assays|If possible, `LOINC `_ codes should be used to specify laboratory test assays. Gender|`LOINC `_ codes should be used to specify self-reported gender. -PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). \ No newline at end of file +PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). +Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index 919e8f5e..b8a7e53b 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -230,6 +230,22 @@ These codes from `NCI Thesaurus `_ ca "NCIT:C142360", "iRECIST Unconfirmed Progressive Disease", "iRECISTUnconfirmedProgressiveDisease()" +Severity +^^^^^^^^ + +Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "HP:0012827", "Borderline", "borderline()" + "HP:0012825", "Mild", "mild()" + "HP:0012826", "Moderate", "moderate()" + "HP:0012828", "Severe", "severe()" + "HP:0012829", "Profound", "profound()" + + SpatialPattern ^^^^^^^^^^^^^^ diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java index 9a0fb7e4..276120a5 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java @@ -1,54 +1,21 @@ package org.phenopackets.phenopackettools.builder.constants; +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; import org.phenopackets.schema.v2.core.OntologyClass; -import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; - - public class Severity { - private Severity() { - } - - private static final OntologyClass BORDERLINE = ontologyClass("HP:0012827", "Borderline"); - private static final OntologyClass MILD = ontologyClass("HP:0012825", "Mild"); - private static final OntologyClass MODERATE = ontologyClass("HP:0012826", "Moderate"); - private static final OntologyClass SEVERE = ontologyClass("HP:0012828", "Severe"); - private static final OntologyClass PROFOUND = ontologyClass("HP:0012829", "Profound"); - - /** - * Having a minor degree of severity that is considered to be on the boundary between the normal and the abnormal ranges. For quantitative traits, a deviation of that is less than two standard deviations from the appropriate population mean. - */ - public static OntologyClass borderline() { - return BORDERLINE; - } - - /** - * Having a relatively minor degree of severity. For quantitative traits, a deviation of between two and three standard deviations from the appropriate population mean. - */ - public static OntologyClass mild() { - return MILD; - } - - /** - * Having a medium degree of severity. For quantitative traits, a deviation of between three and four standard deviations from the appropriate population mean. - */ - public static OntologyClass moderate() { - return MODERATE; - } + private static final OntologyClass BORDERLINE = OntologyClassBuilder.ontologyClass("HP:0012827", "Borderline"); + private static final OntologyClass MILD = OntologyClassBuilder.ontologyClass("HP:0012825", "Mild"); + private static final OntologyClass MODERATE = OntologyClassBuilder.ontologyClass("HP:0012826", "Moderate"); + private static final OntologyClass SEVERE = OntologyClassBuilder.ontologyClass("HP:0012828", "Severe"); + private static final OntologyClass PROFOUND = OntologyClassBuilder.ontologyClass("HP:0012829", "Profound"); - /** - * Having a high degree of severity. For quantitative traits, a deviation of between four and five standard deviations from the appropriate population mean. - */ - public static OntologyClass severe() { - return SEVERE; - } - /** - * Having an extremely high degree of severity. For quantitative traits, a deviation of more than five standard deviations from the appropriate population mean. - */ - public static OntologyClass profound() { - return PROFOUND; - } + public static OntologyClass borderline() { return BORDERLINE; } + public static OntologyClass mild() { return MILD; } + public static OntologyClass moderate() { return MODERATE; } + public static OntologyClass severe() { return SEVERE; } + public static OntologyClass profound() { return PROFOUND; } } From dc509d56082299577562acbd2454c6b80b6cc49e Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 09:41:03 -0400 Subject: [PATCH 31/79] Update PhenotypicFeatureBuilder.java --- .../builder/builders/PhenotypicFeatureBuilder.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java index da10dbba..d1efb9ea 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/PhenotypicFeatureBuilder.java @@ -45,10 +45,10 @@ public PhenotypicFeatureBuilder onset(TimeElement time) { } /** - * @param isoISO8601 A string such as P10Y4M2D representing the age of onset/observation + * @param iso8601 A string such as P10Y4M2D representing the age of onset/observation */ - public PhenotypicFeatureBuilder iso8601onset(String isoISO8601) { - builder.setOnset(TimeElements.age(isoISO8601)); + public PhenotypicFeatureBuilder iso8601onset(String iso8601) { + builder.setOnset(TimeElements.age(iso8601)); return this; } From 51061f2f00b6d597a571c2b966bb1219ea3cf99d Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 09:57:04 -0400 Subject: [PATCH 32/79] Adding Evidence as a Constant --- constants/Evidence.csv | 6 ++++++ constants/rtd_texts.txt | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 constants/Evidence.csv diff --git a/constants/Evidence.csv b/constants/Evidence.csv new file mode 100644 index 00000000..d099e96f --- /dev/null +++ b/constants/Evidence.csv @@ -0,0 +1,6 @@ +ontology.id ontology.label variable.name function.name +ECO:0006016 author statement from published clinical study AUTHOR_STATEMENT_FROM_PCS authorStatementFromPublishedClinicalStudy +ECO:0007539 author statement from published clinical study used in automatic assertion AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC authorStatementFromPublishedClinicalStudyAutomaticAssertion +ECO:0006017author statement from published clinical study used in manual assertion AUTHOR_STATEMENT_FROM_PCS_MANUAL authorStatementFromPublishedClinicalStudyManualAssertion +ECO:0000033author statement supported by traceable reference AUTHOR_STATEMENT_TRACEABLE_REFERENCE authorStatementSupportedByTraceableReference +ECO:0006154self-reported patient statement evidence SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE selfReportedPatientStatementEvidence diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index 0aacfe0c..852d34f6 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -11,4 +11,5 @@ Response|These codes from `NCI Thesaurus `_ codes should be used to specify laboratory test assays. Gender|`LOINC `_ codes should be used to specify self-reported gender. PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). -Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. \ No newline at end of file +Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. +Evidence|Terms from the `Evidence and Conclusion Ontology ` are used to specify evidence categories. \ No newline at end of file From 7f363bff92781432336a86db1d41f1fc0282f333 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 09:58:29 -0400 Subject: [PATCH 33/79] Build organ system validator API. Signed-off-by: Daniel Danis --- .../phenotype/HpoPhenotypeValidators.java | 68 +++++++++++++++++++ .../orgsys/AbstractOrganSystemValidator.java | 11 ++- .../orgsys/CohortHpoOrganSystemValidator.java | 2 +- .../orgsys/FamilyHpoOrganSystemValidator.java | 2 +- .../PhenopacketHpoOrganSystemValidator.java | 4 +- 5 files changed, 82 insertions(+), 5 deletions(-) diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java index 18271c6b..07c56347 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoPhenotypeValidators.java @@ -1,15 +1,21 @@ package org.phenopackets.phenopackettools.validator.core.phenotype; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.CohortHpoAncestryValidator; import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.FamilyHpoAncestryValidator; import org.phenopackets.phenopackettools.validator.core.phenotype.ancestry.PhenopacketHpoAncestryValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.orgsys.CohortHpoOrganSystemValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.orgsys.FamilyHpoOrganSystemValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.orgsys.PhenopacketHpoOrganSystemValidator; import org.phenopackets.phenopackettools.validator.core.phenotype.primary.CohortHpoPhenotypeValidator; import org.phenopackets.phenopackettools.validator.core.phenotype.primary.FamilyHpoPhenotypeValidator; import org.phenopackets.phenopackettools.validator.core.phenotype.primary.PhenopacketHpoPhenotypeValidator; import org.phenopackets.schema.v2.*; +import java.util.Collection; + /** * Static factory class for getting {@link PhenopacketValidator}s for top-level Phenopacket schema components. */ @@ -136,4 +142,66 @@ public static PhenopacketValidator cohortHpoAncestryValidator(O } } + /** + * A static factory class for providing validators for checking annotation of organ systems. + *

+ * The validators check if each phenopacket or family/cohort member have annotation + * for an organ system represented by a top-level HPO term + * (e.g. Abnormality of limbs). + * The annotation comprises either one or more observed descendants + * (e.g. Arachnodactyly), + * or excluded top-level HPO term + * (NOT Abnormality of limbs). + *

+ */ + public static class OrganSystem { + private OrganSystem() { + } + + /** + * Get {@link PhenopacketValidator} to validate annotation of organ systems in a {@link Phenopacket} + * using provided {@link Ontology} and a collection of organ system {@link TermId}s. + *

+ * NOTE: the organ system {@link TermId} that is absent from the {@link Ontology} is disregarded + * and not used for validation. + * + * @param hpo HPO ontology + * @param organSystemTermIds a collection of HPO {@link TermId}s corresponding to organ systems. + */ + public static PhenopacketValidator phenopacketHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + return new PhenopacketHpoOrganSystemValidator(hpo, organSystemTermIds); + } + + /** + * Get {@link PhenopacketValidator} to validate annotation of organ systems in a {@link Family} + * using provided {@link Ontology} and a collection of organ system {@link TermId}s. + *

+ * NOTE: the organ system {@link TermId} that is absent from the {@link Ontology} is disregarded + * and not used for validation. + * + * @param hpo HPO ontology + * @param organSystemTermIds a collection of HPO {@link TermId}s corresponding to organ systems. + */ + public static PhenopacketValidator familyHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + return new FamilyHpoOrganSystemValidator(hpo, organSystemTermIds); + } + + /** + * Get {@link PhenopacketValidator} to validate annotation of organ systems in a {@link Cohort} + * using provided {@link Ontology} and a collection of organ system {@link TermId}s. + *

+ * NOTE: the organ system {@link TermId} that is absent from the {@link Ontology} is disregarded + * and not used for validation. + * + * @param hpo HPO ontology + * @param organSystemTermIds a collection of HPO {@link TermId}s corresponding to organ systems. + */ + public static PhenopacketValidator cohortHpoOrganSystemValidator(Ontology hpo, + Collection organSystemTermIds) { + return new CohortHpoOrganSystemValidator(hpo, organSystemTermIds); + } + } + } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java index 9acc8830..58970df1 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java @@ -20,7 +20,16 @@ import java.util.function.Predicate; import java.util.stream.Stream; -abstract class AbstractOrganSystemValidator extends BaseHpoValidator { +/** + * The base class for an organ system validator to check if each phenopacket or family/cohort member have annotation + * for an organ system represented by a top-level HPO term + * (e.g. Abnormality of limbs). + * The annotation comprises either one or more observed descendants + * (e.g. Arachnodactyly), + * or excluded top-level HPO term + * (NOT Abnormality of limbs). + */ +public abstract class AbstractOrganSystemValidator extends BaseHpoValidator { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractOrganSystemValidator.class); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java index a15d084a..96bea28b 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/CohortHpoOrganSystemValidator.java @@ -10,7 +10,7 @@ public class CohortHpoOrganSystemValidator extends AbstractOrganSystemValidator { - protected CohortHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { + public CohortHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { super(hpo, organSystemTermIds); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java index 96b2d80c..7edf20df 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/FamilyHpoOrganSystemValidator.java @@ -10,7 +10,7 @@ public class FamilyHpoOrganSystemValidator extends AbstractOrganSystemValidator { - protected FamilyHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { + public FamilyHpoOrganSystemValidator(Ontology hpo, Collection organSystemTermIds) { super(hpo, organSystemTermIds); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java index cb79958d..d4db9e81 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/PhenopacketHpoOrganSystemValidator.java @@ -4,13 +4,13 @@ import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import java.util.List; +import java.util.Collection; import java.util.stream.Stream; public class PhenopacketHpoOrganSystemValidator extends AbstractOrganSystemValidator { public PhenopacketHpoOrganSystemValidator(Ontology hpo, - List organSystemTerms) { + Collection organSystemTerms) { super(hpo, organSystemTerms); } From 0b69c0e6ceabf2f7637619f6d5b766167bc2960c Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 10:47:04 -0400 Subject: [PATCH 34/79] Adding Biospecimen as a Constant --- constants/Biospecimen.tsv | 10 ++++++++++ constants/Evidence.csv | 6 ------ constants/Evidence.tsv | 6 ++++++ constants/rtd_texts.txt | 3 ++- docs/constants.rst | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 constants/Biospecimen.tsv delete mode 100644 constants/Evidence.csv create mode 100644 constants/Evidence.tsv diff --git a/constants/Biospecimen.tsv b/constants/Biospecimen.tsv new file mode 100644 index 00000000..ebe52d28 --- /dev/null +++ b/constants/Biospecimen.tsv @@ -0,0 +1,10 @@ +ontology.id ontology.label variable.name function.name +NCIT:C133261 Bone Marrow Aspirate BONE_MARROW_ASPIRATE boneMarrowAspirate +NCIT:C158416 Blood DNA BLOOD_DNA bloodDNA +NCIT:C185194 Cerebrospinal Fluid Sample CSF_SAMPLE cerebrospinalFluidSample +NCIT:C156435 Formalin-Fixed Paraffin-Embedded DNA FORMALIN_FIXED_PARAFIN_DNA formalinFixedParaffinEmbeddedDNA +NCIT:C13195 Bronchoalveolar Lavage Fluid BAL_FLUID bronchoalveolarLavageFluid +NCIT:C187062 Pericardial Fluid Specimen PERICARDIAL_FLUID_SAMPLE Pericardial Fluid Specimen +NCIT:C185197 Peritoneal Fluid Sample PERTONIAL_FLUID_SAMPLE Peritoneal Fluid Sample +NCIT:C163995 Total RNA TOTAL_RNA totalRNA +NCIT:C18009 Tumor Tissue TUMOR_TISSUE tumorTissue diff --git a/constants/Evidence.csv b/constants/Evidence.csv deleted file mode 100644 index d099e96f..00000000 --- a/constants/Evidence.csv +++ /dev/null @@ -1,6 +0,0 @@ -ontology.id ontology.label variable.name function.name -ECO:0006016 author statement from published clinical study AUTHOR_STATEMENT_FROM_PCS authorStatementFromPublishedClinicalStudy -ECO:0007539 author statement from published clinical study used in automatic assertion AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC authorStatementFromPublishedClinicalStudyAutomaticAssertion -ECO:0006017author statement from published clinical study used in manual assertion AUTHOR_STATEMENT_FROM_PCS_MANUAL authorStatementFromPublishedClinicalStudyManualAssertion -ECO:0000033author statement supported by traceable reference AUTHOR_STATEMENT_TRACEABLE_REFERENCE authorStatementSupportedByTraceableReference -ECO:0006154self-reported patient statement evidence SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE selfReportedPatientStatementEvidence diff --git a/constants/Evidence.tsv b/constants/Evidence.tsv new file mode 100644 index 00000000..9e00afc7 --- /dev/null +++ b/constants/Evidence.tsv @@ -0,0 +1,6 @@ +ontology.id ontology.label variable.name function.name +ECO:0006016 author statement from published clinical study AUTHOR_STATEMENT_FROM_PCS authorStatementFromPublishedClinicalStudy +ECO:0007539 author statement from published clinical study used in automatic assertion AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC authorStatementFromPublishedClinicalStudyAutomaticAssertion +ECO:0006017 author statement from published clinical study used in manual assertion AUTHOR_STATEMENT_FROM_PCS_MANUAL authorStatementFromPublishedClinicalStudyManualAssertion +ECO:0000033 author statement supported by traceable reference AUTHOR_STATEMENT_TRACEABLE_REFERENCE authorStatementSupportedByTraceableReference +ECO:0006154 self-reported patient statement evidence SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE selfReportedPatientStatementEvidence \ No newline at end of file diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index 852d34f6..c5db935d 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -12,4 +12,5 @@ Assays|If possible, `LOINC `_ codes should be used to specif Gender|`LOINC `_ codes should be used to specify self-reported gender. PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. -Evidence|Terms from the `Evidence and Conclusion Ontology ` are used to specify evidence categories. \ No newline at end of file +Evidence|Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. +Biospecimen|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index b8a7e53b..d51ace4b 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -51,6 +51,42 @@ If possible, `LOINC `_ codes should be used to specify labor "LOINC:2157-6", "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma", "creatineKinaseActivity()" +Biospecimen +^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C133261", "Bone Marrow Aspirate", "boneMarrowAspirate()" + "NCIT:C158416", "Blood DNA", "bloodDNA()" + "NCIT:C185194", "Cerebrospinal Fluid Sample", "cerebrospinalFluidSample()" + "NCIT:C156435", "Formalin-Fixed Paraffin-Embedded DNA", "formalinFixedParaffinEmbeddedDNA()" + "NCIT:C13195", "Bronchoalveolar Lavage Fluid", "bronchoalveolarLavageFluid()" + "NCIT:C187062", "Pericardial Fluid Specimen", "Pericardial Fluid Specimen()" + "NCIT:C185197", "Peritoneal Fluid Sample", "Peritoneal Fluid Sample()" + "NCIT:C163995", "Total RNA", "totalRNA()" + "NCIT:C18009", "Tumor Tissue", "tumorTissue()" + + +Evidence +^^^^^^^^ + +Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "ECO:0006016", "author statement from published clinical study", "authorStatementFromPublishedClinicalStudy()" + "ECO:0007539", "author statement from published clinical study used in automatic assertion", "authorStatementFromPublishedClinicalStudyAutomaticAssertion()" + "ECO:0006017", "author statement from published clinical study used in manual assertion", "AUTHOR_STATEMENT_FROM_PCS_MANUAL()" + "ECO:0000033", "author statement supported by traceable reference", "AUTHOR_STATEMENT_TRACEABLE_REFERENCE()" + "ECO:0006154", "self-reported patient statement evidence", "SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE()" + + Gender ^^^^^^ From 9bd677affb070b47149b32685ca3d1200be3df4f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 10:49:47 -0400 Subject: [PATCH 35/79] Test the organ system validator. Signed-off-by: Daniel Danis --- .../AbstractHpoAncestryValidator.java | 48 ++---- .../orgsys/AbstractOrganSystemValidator.java | 21 ++- .../phenotype/util/MaybeExcludedTermId.java | 20 +++ .../PhenotypicFeaturesByExclusionStatus.java | 9 + .../validator/core/phenotype/util/Util.java | 42 +++++ .../phenotype/AncestryHpoValidatorTest.java | 41 ++--- .../phenotype/OrganSystemValidatorTest.java | 159 ++++++++++++++++++ .../validator/core/phenotype/Utils.java | 33 ++++ 8 files changed, 295 insertions(+), 78 deletions(-) create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java create mode 100644 phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java create mode 100644 phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java index c5b9c887..bd41dc84 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java @@ -1,7 +1,6 @@ package org.phenopackets.phenopackettools.validator.core.phenotype.ancestry; import com.google.protobuf.MessageOrBuilder; -import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenol.ontology.data.Term; @@ -9,14 +8,12 @@ import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.PhenotypicFeaturesByExclusionStatus; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.Util; import org.phenopackets.schema.v2.PhenopacketOrBuilder; import org.phenopackets.schema.v2.core.PhenotypicFeature; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.util.*; -import java.util.function.Function; -import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -33,8 +30,6 @@ */ public abstract class AbstractHpoAncestryValidator extends BaseHpoValidator { - private static final Logger LOGGER = LoggerFactory.getLogger(AbstractHpoAncestryValidator.class); - private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "HpoAncestryValidator", "HPO ancestry phenotypic feature validator", @@ -61,36 +56,28 @@ public List validate(T component) { protected abstract Stream extractPhenopackets(T message); private Stream validatePhenopacketPhenotypicFeatures(String id, List phenotypicFeatures) { - Map> featuresByExclusion = phenotypicFeatures.stream() - .map(toMaybeObservedTermId()) - .flatMap(Optional::stream) - // Use `partitioningBy` instead of `groupingBy` to ensure the map contains keys - // for both `true` and `false`. Then extract `TermId` and collect in a `Set`. - .collect(Collectors.partitioningBy(MaybeExcludedTermId::excluded, - Collectors.mapping(MaybeExcludedTermId::termId, Collectors.toSet()))); - + PhenotypicFeaturesByExclusionStatus featuresByExclusion = Util.partitionByExclusionStatus(phenotypicFeatures); Stream.Builder results = Stream.builder(); // Check that the component does not contain both observed term and its ancestor. - Set allObserved = featuresByExclusion.get(false); - Set allExcluded = featuresByExclusion.get(true); - for (TermId observed : allObserved) { + + for (TermId observed : featuresByExclusion.observedPhenotypicFeatures()) { for (TermId ancestor : OntologyAlgorithm.getAncestorTerms(hpo, observed, false)) { - if (allObserved.contains(ancestor)) + if (featuresByExclusion.observedPhenotypicFeatures().contains(ancestor)) results.add(constructResultForAnObservedTerm(id, observed, ancestor, false)); - if (allExcluded.contains(ancestor)) + if (featuresByExclusion.excludedPhenotypicFeatures().contains(ancestor)) results.add(constructResultForAnObservedTerm(id, observed, ancestor, true)); } } // Check that the component does not have negated descendant - for (TermId excluded : allExcluded) { + for (TermId excluded : featuresByExclusion.excludedPhenotypicFeatures()) { for (TermId child : OntologyAlgorithm.getDescendents(hpo, excluded)) { if (child.equals(excluded)) // skip the parent term continue; - if (allExcluded.contains(child)) + if (featuresByExclusion.excludedPhenotypicFeatures().contains(child)) results.add(constructResultForAnExcludedTerm(id, excluded, child)); } } @@ -98,19 +85,6 @@ private Stream validatePhenopacketPhenotypicFeatures(String id return results.build(); } - private static Function> toMaybeObservedTermId() { - return pf -> { - TermId termId; - try { - termId = TermId.of(pf.getType().getId()); - } catch (PhenolRuntimeException e) { - LOGGER.warn("Skipping ancestry validation of malformed term ID {}", pf.getType().getId()); - return Optional.empty(); - } - return Optional.of(new MaybeExcludedTermId(termId, pf.getExcluded())); - }; - } - private ValidationResult constructResultForAnObservedTerm(String id, TermId observedId, TermId ancestorId, boolean ancestorIsExcluded) { Term observedTerm = hpo.getTermMap().get(observedId); String observedTermName = observedTerm == null ? UNKNOWN : observedTerm.getName(); @@ -122,7 +96,7 @@ private ValidationResult constructResultForAnObservedTerm(String id, TermId obse id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); else message = "Phenotypic features of %s must not contain both an observed term (%s, %s) and an observed ancestor (%s, %s)".formatted( - id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); + id, observedTermName, observedId.getValue(), ancestorTermName, ancestorId.getValue()); return ValidationResult.error(VALIDATOR_INFO, APR_VIOLATION, message); } @@ -138,6 +112,4 @@ private ValidationResult constructResultForAnExcludedTerm(String id, TermId excl return ValidationResult.error(VALIDATOR_INFO, APR_VIOLATION, message); } - private record MaybeExcludedTermId(TermId termId, boolean excluded) { - } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java index 58970df1..255ba160 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java @@ -9,6 +9,8 @@ import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.PhenotypicFeaturesByExclusionStatus; +import org.phenopackets.phenopackettools.validator.core.phenotype.util.Util; import org.phenopackets.schema.v2.PhenopacketOrBuilder; import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.PhenotypicFeature; @@ -36,7 +38,7 @@ public abstract class AbstractOrganSystemValidator e private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "HpoOrganSystemValidator", "HPO organ system validator", - "Validate that HPO terms are well formatted, present, and non-obsolete based on the provided HPO"); + "Validate annotation of selected organ systems"); private static final String MISSING_ORGAN_SYSTEM_CATEGORY = "Missing organ system annotation"; @@ -78,27 +80,24 @@ public List validate(T component) { protected abstract Stream getPhenopackets(T component); private Stream checkPhenotypicFeatures(String individualId, List features) { - // Get a list of observed phenotypic feature term IDs. - List phenotypeFeatures = features.stream() - .filter(pf -> !pf.getExcluded()) // TODO - should we only work with the observed features? - .map(PhenotypicFeature::getType) - .map(toTermId(individualId)) - .flatMap(Optional::stream) - .toList(); - + PhenotypicFeaturesByExclusionStatus featuresByExclusion = Util.partitionByExclusionStatus(features); Stream.Builder results = Stream.builder(); // Check we have at least one phenotypeFeature (pf) that is a descendant of given organSystemId // and report otherwise. organSystemLoop: for (TermId organSystemId : organSystemTermIds) { - for (TermId pf : phenotypeFeatures) { + for (TermId pf : featuresByExclusion.observedPhenotypicFeatures()) { if (OntologyAlgorithm.existsPath(hpo, pf, organSystemId)) { continue organSystemLoop; // It only takes one termId to annotate an organ system. } } - // If we get here, then the organSystemId is not annotated, and we report a validation error. + // Check if the organ system abnormality has been specifically excluded. + if (featuresByExclusion.excludedPhenotypicFeatures().contains(organSystemId)) + continue; // Yes, it was. Let's check the next organ system + + // The organSystemId is neither annotated nor excluded. We report a validation error. Term organSystem = hpo.getTermMap().get(organSystemId); ValidationResult result = ValidationResult.error(VALIDATOR_INFO, MISSING_ORGAN_SYSTEM_CATEGORY, diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java new file mode 100644 index 00000000..9b4eb0d4 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/MaybeExcludedTermId.java @@ -0,0 +1,20 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.util; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.Optional; + +record MaybeExcludedTermId(TermId termId, boolean excluded) { + + static Optional fromPhenotypicFeature(PhenotypicFeature phenotypicFeature) { + TermId termId; + try { + termId = TermId.of(phenotypicFeature.getType().getId()); + } catch (PhenolRuntimeException e) { + return Optional.empty(); + } + return Optional.of(new MaybeExcludedTermId(termId, phenotypicFeature.getExcluded())); + } +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java new file mode 100644 index 00000000..a5abb10e --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/PhenotypicFeaturesByExclusionStatus.java @@ -0,0 +1,9 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.util; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.util.Set; + +public record PhenotypicFeaturesByExclusionStatus(Set observedPhenotypicFeatures, + Set excludedPhenotypicFeatures) { +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java new file mode 100644 index 00000000..ffcf668b --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/util/Util.java @@ -0,0 +1,42 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype.util; + +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class Util { + + private static final Logger LOGGER = LoggerFactory.getLogger(Util.class); + + private Util() { + // static utility class + } + + public static PhenotypicFeaturesByExclusionStatus partitionByExclusionStatus(Collection phenotypicFeatures) { + Map> byExclusion = phenotypicFeatures.stream() + .map(toMaybeObservedTermId()) + .flatMap(Optional::stream) + // Use `partitioningBy` instead of `groupingBy` to ensure the map contains keys + // for both `true` and `false`. Then extract `TermId` and collect in a `Set`. + .collect(Collectors.partitioningBy(MaybeExcludedTermId::excluded, + Collectors.mapping(MaybeExcludedTermId::termId, Collectors.toSet()))); + return new PhenotypicFeaturesByExclusionStatus(byExclusion.get(false), byExclusion.get(true)); + } + + private static Function> toMaybeObservedTermId() { + return pf -> MaybeExcludedTermId.fromPhenotypicFeature(pf) + .or(() -> { + // Let's log the malformed term. + LOGGER.warn("Skipping validation of malformed term ID {}", pf.getType().getId()); + return Optional.empty(); + }); + } +} diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java index 8569bb1e..aa5467d0 100644 --- a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/AncestryHpoValidatorTest.java @@ -9,12 +9,11 @@ import org.phenopackets.phenopackettools.validator.core.ValidationLevel; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.*; -import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.PhenotypicFeature; -import java.util.Arrays; import java.util.List; +import static org.phenopackets.phenopackettools.validator.core.phenotype.Utils.*; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -36,7 +35,8 @@ public void setUp() { public void testValidInput() { // Has some Abnormality of finger but no Arachnodactyly. Phenopacket pp = createPhenopacket( - "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) ).build(); @@ -49,7 +49,7 @@ public void testValidInput() { public void testFailsIfTermAndAncestorIsObserved() { // Has some Abnormality of finger and Arachnodactyly. Only Arachnodactyly should be present. Phenopacket pp = createPhenopacket( - "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), + "example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) ).build(); @@ -67,7 +67,7 @@ public void testFailsIfTermAndAncestorIsObserved() { public void testFailsIfTermAndAncestorIsExcluded() { // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. Phenopacket pp = createPhenopacket( - "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + "example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true) ).build(); @@ -84,7 +84,7 @@ public void testFailsIfTermAndAncestorIsExcluded() { public void testFailsIfTermIsPresentAndAncestorIsExcluded() { // Has neither Abnormality of finger nor Arachnodactyly. Only Abnormality of finger should be present. Phenopacket pp = createPhenopacket( - "example-phenopacket", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), + "example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", true), createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) ).build(); @@ -116,15 +116,15 @@ public void setUp() { @Test public void testValidInput() { Family family = Family.newBuilder() - .setProband(createPhenopacket("example-phenopacket", + .setProband(createPhenopacket("example-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) .build()) - .addRelatives(createPhenopacket("dad-phenopacket", + .addRelatives(createPhenopacket("dad-phenopacket", "example-dad", createPhenotypicFeature("HP:0001238", "Slender finger", false), createPhenotypicFeature("HP:0100807", "Long fingers", false)) .build()) - .addRelatives(createPhenopacket("mom-phenopacket", + .addRelatives(createPhenopacket("mom-phenopacket", "example-mom", createPhenotypicFeature("HP:0001238", "Slender finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) .build()) @@ -154,15 +154,15 @@ public void setUp() { @Test public void testValidInput() { Cohort cohort = Cohort.newBuilder() - .addMembers(createPhenopacket("joe-phenopacket", + .addMembers(createPhenopacket("joe-phenopacket", "example-subject", createPhenotypicFeature("HP:0001167", "Abnormality of finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) .build()) - .addMembers(createPhenopacket("jim-phenopacket", + .addMembers(createPhenopacket("jim-phenopacket", "example-jim", createPhenotypicFeature("HP:0001238", "Slender finger", false), createPhenotypicFeature("HP:0100807", "Long fingers", false)) .build()) - .addMembers(createPhenopacket("jane-phenopacket", + .addMembers(createPhenopacket("jane-phenopacket", "example-jane", createPhenotypicFeature("HP:0001238", "Slender finger", false), createPhenotypicFeature("HP:0001166", "Arachnodactyly", true)) .build()) @@ -174,21 +174,4 @@ public void testValidInput() { } } - private static Phenopacket.Builder createPhenopacket(String phenopacketId, - PhenotypicFeature... features) { - return Phenopacket.newBuilder() - .setId(phenopacketId) - .addAllPhenotypicFeatures(Arrays.asList(features)); - } - - private static PhenotypicFeature createPhenotypicFeature(String id, String label, boolean excluded) { - return PhenotypicFeature.newBuilder() - .setType(OntologyClass.newBuilder() - .setId(id) - .setLabel(label) - .build()) - .setExcluded(excluded) - .build(); - } - } diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java new file mode 100644 index 00000000..5c3a3a2e --- /dev/null +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java @@ -0,0 +1,159 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.validator.core.*; +import org.phenopackets.schema.v2.*; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.List; +import java.util.Set; + +import static org.phenopackets.phenopackettools.validator.core.phenotype.Utils.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +public class OrganSystemValidatorTest { + + private static final Ontology HPO = TestData.HPO; + private static final Set ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM = Set.of(TermId.of("HP:0040064")); + // Not a real organ system, but for the sake of testing... + private static final Set SLENDER_FINGER_ORGAN_SYSTEM = Set.of(TermId.of("HP:0001238")); + + @Nested + public class PhenopacketTest { + + private PhenopacketValidator abnormalityOfLimbValidator; + private PhenopacketValidator slenderFingerValidator; + + @BeforeEach + public void setUp() { + abnormalityOfLimbValidator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(HPO, ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM); + slenderFingerValidator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(HPO, SLENDER_FINGER_ORGAN_SYSTEM); + } + + @Test + public void noValidationErrorsIfOrganSystemIsAnnotated() { + // Has Arachnodactyly. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false) + ).build(); + + List results = abnormalityOfLimbValidator.validate(pp); + + assertThat(results, is(empty())); + } + + @Test + public void noValidationErrorsIfOrganSystemAbnormalityIsExcluded() { + // Has Arachnodactyly. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0040064", "Abnormality of limbs", true) + ).build(); + + List results = abnormalityOfLimbValidator.validate(pp); + + assertThat(results, is(empty())); + } + + @ParameterizedTest + @CsvSource({ + "true", + "false" + }) + public void annotationAbsenceLeadsToAnError(boolean excluded) { + // Long fingers and Slender finger are siblings, hence no annotation here. + Phenopacket pp = createPhenopacket( + "example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0100807", "Long fingers", excluded) + ).build(); + + List results = slenderFingerValidator.validate(pp); + + assertThat(results, hasSize(1)); + ValidationResult result = results.get(0); + assertThat(result.validatorInfo(), equalTo(slenderFingerValidator.validatorInfo())); + assertThat(result.level(), equalTo(ValidationLevel.ERROR)); + assertThat(result.category(), equalTo("Missing organ system annotation")); + assertThat(result.message(), equalTo("Missing annotation for Slender finger [HP:0001238] in 'example-subject'")); + } + } + + /** + * White-box testing - we know that the {@link PhenotypicFeature} is an attribute of a {@link Phenopacket}, so we + * test the validation logic extensively in {@link OrganSystemValidatorTest.PhenopacketTest}. + * The {@link OrganSystemValidatorTest.FamilyTest} test suite ensures there are not errors in a valid input. + */ + @Nested + public class FamilyTest { + + private PhenopacketValidator abnormalityOfLimbValidator; + + @BeforeEach + public void setUp() { + abnormalityOfLimbValidator = HpoPhenotypeValidators.OrganSystem.familyHpoOrganSystemValidator(HPO, ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM); + } + + @Test + public void testValidInput() { + Family family = Family.newBuilder() + .setProband(createPhenopacket("example-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false)) + .build()) + .addRelatives(createPhenopacket("dad-phenopacket", "example-dad", + createPhenotypicFeature("HP:0001238", "Slender finger", false)) + .build()) + .addRelatives(createPhenopacket("mom-phenopacket", "other-mom", + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .build(); + + List results = abnormalityOfLimbValidator.validate(family); + + assertThat(results, is(empty())); + } + } + + /** + * White-box testing (same as in {@link OrganSystemValidatorTest.FamilyTest}) - we know that the {@link PhenotypicFeature} + * is an attribute of a {@link Phenopacket}, so we test the validation logic extensively + * in {@link OrganSystemValidatorTest.PhenopacketTest}. + * The {@link OrganSystemValidatorTest.CohortTest} test suite ensures there are not errors in valid input. + */ + @Nested + public class CohortTest { + + private PhenopacketValidator abnormalityOfLimbValidator; + + @BeforeEach + public void setUp() { + abnormalityOfLimbValidator = HpoPhenotypeValidators.OrganSystem.cohortHpoOrganSystemValidator(HPO, ABNORMALITY_OF_LIMBS_ORGAN_SYSTEM); + } + + @Test + public void testValidInput() { + Cohort cohort = Cohort.newBuilder() + .addMembers(createPhenopacket("joe-phenopacket", "example-subject", + createPhenotypicFeature("HP:0001166", "Arachnodactyly", false)) + .build()) + .addMembers(createPhenopacket("jim-phenopacket", "example-jim", + createPhenotypicFeature("HP:0001238", "Slender finger", false)) + .build()) + .addMembers(createPhenopacket("jane-phenopacket", "example-jane", + createPhenotypicFeature("HP:0100807", "Long fingers", false)) + .build()) + .build(); + + List results = abnormalityOfLimbValidator.validate(cohort); + + assertThat(results, is(empty())); + } + } +} diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java new file mode 100644 index 00000000..7b93bc68 --- /dev/null +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/Utils.java @@ -0,0 +1,33 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.Individual; +import org.phenopackets.schema.v2.core.OntologyClass; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.Arrays; + +public class Utils { + + static Phenopacket.Builder createPhenopacket(String phenopacketId, + String subjectId, + PhenotypicFeature... features) { + return Phenopacket.newBuilder() + .setId(phenopacketId) + .setSubject(Individual.newBuilder() + .setId(subjectId) + .build()) + .addAllPhenotypicFeatures(Arrays.asList(features)); + } + + static PhenotypicFeature createPhenotypicFeature(String id, String label, boolean excluded) { + return PhenotypicFeature.newBuilder() + .setType(OntologyClass.newBuilder() + .setId(id) + .setLabel(label) + .build()) + .setExcluded(excluded) + .build(); + } + +} From 63dd572108e1a0533c1305720e5e195c2df2cab8 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 10:55:44 -0400 Subject: [PATCH 36/79] Adding TumorProgression as a Constant --- constants/TumorProgression.tsv | 4 ++++ constants/rtd_texts.txt | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 constants/TumorProgression.tsv diff --git a/constants/TumorProgression.tsv b/constants/TumorProgression.tsv new file mode 100644 index 00000000..613fa925 --- /dev/null +++ b/constants/TumorProgression.tsv @@ -0,0 +1,4 @@ +ontology.id ontology.label variable.name function.name +NCIT:C8509 Primary Neoplasm PRIMARY_NEOPLASM primaryNeoplasm +NCIT:C3261 Metastatic Neoplasm METASTATIC_NEOPLASM metastaticNeoplasm +NCIT:C4798 Recurrent Neoplasm RECURRENT_NEOPLASM recurrentNeoplasm diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index c5db935d..670f0cf6 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -13,4 +13,5 @@ Gender|`LOINC `_ codes should be used to specify self-report PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. Evidence|Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. -Biospecimen|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. \ No newline at end of file +Biospecimen|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. +TumorProgression|Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. \ No newline at end of file From 04c74b9fbfa2bcaa1484378d9baf261db169bc96 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 11:33:44 -0400 Subject: [PATCH 37/79] Adding DiseaseGrade as a Constant --- .../{Biospecimen.tsv => BiospecimenType.tsv} | 0 constants/DiseaseGrade.tsv | 7 ++++ constants/rtd_texts.txt | 6 ++-- docs/constants.rst | 35 +++++++++++++++++-- 4 files changed, 44 insertions(+), 4 deletions(-) rename constants/{Biospecimen.tsv => BiospecimenType.tsv} (100%) create mode 100644 constants/DiseaseGrade.tsv diff --git a/constants/Biospecimen.tsv b/constants/BiospecimenType.tsv similarity index 100% rename from constants/Biospecimen.tsv rename to constants/BiospecimenType.tsv diff --git a/constants/DiseaseGrade.tsv b/constants/DiseaseGrade.tsv new file mode 100644 index 00000000..aaa2c73d --- /dev/null +++ b/constants/DiseaseGrade.tsv @@ -0,0 +1,7 @@ +ontology.id ontology.label variable.name function.name +NCIT:C28077 Grade 1 GRADE_1 grade1 +NCIT:C28078 Grade 2 GRADE_2 grade2 +NCIT:C28079 Grade 3 GRADE_3 grade3 +NCIT:C28080 Grade 3a GRADE_3A grade3a +NCIT:C28081 Grade 3b GRADE_3B grade3b +NCIT:C28082 Grade 4 GRADE_4 grade4 diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index 670f0cf6..9e69eeb4 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -13,5 +13,7 @@ Gender|`LOINC `_ codes should be used to specify self-report PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. Evidence|Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. -Biospecimen|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. -TumorProgression|Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. \ No newline at end of file +BiospecimenType|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. +TumorProgression|Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. +TumorGrade|Terms from the `NCI Thesaurus `_ to describe microscopic appearance of tumor. Grade 1: Well differentiated (low grade); Grade 2: Moderately differentiated (intermediate grade); Grade 3: Poorly differentiated (high grade); Grade 4: Undifferentiated (high grade). +DiseaseGrade|Terms from the `NCI Thesaurus `_ to represent the tumor grade. \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index d51ace4b..5f68271e 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -51,8 +51,8 @@ If possible, `LOINC `_ codes should be used to specify labor "LOINC:2157-6", "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma", "creatineKinaseActivity()" -Biospecimen -^^^^^^^^^^^ +BiospecimenType +^^^^^^^^^^^^^^^ Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. @@ -71,6 +71,23 @@ Terms from the `NCI Thesaurus `_ are "NCIT:C18009", "Tumor Tissue", "tumorTissue()" +DiseaseGrade +^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ to represent the tumor grade. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C28077", "Grade 1", "grade1()" + "NCIT:C28078", "Grade 2", "grade2()" + "NCIT:C28079", "Grade 3", "grade3()" + "NCIT:C28080", "Grade 3a", "grade3a()" + "NCIT:C28081", "Grade 3b", "grade3b()" + "NCIT:C28082", "Grade 4", "grade4()" + + Evidence ^^^^^^^^ @@ -326,6 +343,20 @@ Modifier terms from the `HPO `_ are used to describe s "HP:0032540", "Joint flexor surface localization", "jointFlexorSurfaceLocalization()" +TumorProgression +^^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C8509", "Primary Neoplasm", "primaryNeoplasm()" + "NCIT:C3261", "Metastatic Neoplasm", "metastaticNeoplasm()" + "NCIT:C4798", "Recurrent Neoplasm", "recurrentNeoplasm()" + + Unit ^^^^ From dff47ce4d75ede1732574df88b1741781f57fdd6 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 11:49:03 -0400 Subject: [PATCH 38/79] Adding MaterialSample as a Constant --- constants/rtd_texts.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index 9e69eeb4..dd18506d 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -16,4 +16,5 @@ Evidence|Terms from the `Evidence and Con clusion Ontology `_ are used to denote the source of a biospecimen. TumorProgression|Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. TumorGrade|Terms from the `NCI Thesaurus `_ to describe microscopic appearance of tumor. Grade 1: Well differentiated (low grade); Grade 2: Moderately differentiated (intermediate grade); Grade 3: Poorly differentiated (high grade); Grade 4: Undifferentiated (high grade). -DiseaseGrade|Terms from the `NCI Thesaurus `_ to represent the tumor grade. \ No newline at end of file +DiseaseGrade|Terms from the `NCI Thesaurus `_ to represent the tumor grade. +MaterialSample|Terms from the `EFO `_ to specify the status of the sample. \ No newline at end of file From e161fdc3ec350ba6358695ee8d7613f62b57cc4a Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 11:56:42 -0400 Subject: [PATCH 39/79] Adding MaterialSample as a Constant --- constants/BiospecimenType.tsv | 4 +-- constants/Evidence.tsv | 6 ++-- constants/MaterialSample.tsv | 3 ++ docs/constants.rst | 23 +++++++++++---- .../builder/constants/BiospecimenType.java | 29 +++++++++++++++++++ .../builder/constants/DiseaseGrade.java | 23 +++++++++++++++ .../builder/constants/Evidence.java | 21 ++++++++++++++ .../builder/constants/MaterialSample.java | 15 ++++++++++ .../builder/constants/TumorProgression.java | 17 +++++++++++ 9 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 constants/MaterialSample.tsv create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java diff --git a/constants/BiospecimenType.tsv b/constants/BiospecimenType.tsv index ebe52d28..43ff2c58 100644 --- a/constants/BiospecimenType.tsv +++ b/constants/BiospecimenType.tsv @@ -4,7 +4,7 @@ NCIT:C158416 Blood DNA BLOOD_DNA bloodDNA NCIT:C185194 Cerebrospinal Fluid Sample CSF_SAMPLE cerebrospinalFluidSample NCIT:C156435 Formalin-Fixed Paraffin-Embedded DNA FORMALIN_FIXED_PARAFIN_DNA formalinFixedParaffinEmbeddedDNA NCIT:C13195 Bronchoalveolar Lavage Fluid BAL_FLUID bronchoalveolarLavageFluid -NCIT:C187062 Pericardial Fluid Specimen PERICARDIAL_FLUID_SAMPLE Pericardial Fluid Specimen -NCIT:C185197 Peritoneal Fluid Sample PERTONIAL_FLUID_SAMPLE Peritoneal Fluid Sample +NCIT:C187062 Pericardial Fluid Specimen PERICARDIAL_FLUID_SAMPLE pericardialFluidSpecimen +NCIT:C185197 Peritoneal Fluid Sample PERTONIAL_FLUID_SAMPLE peritonealFluidSample NCIT:C163995 Total RNA TOTAL_RNA totalRNA NCIT:C18009 Tumor Tissue TUMOR_TISSUE tumorTissue diff --git a/constants/Evidence.tsv b/constants/Evidence.tsv index 9e00afc7..d2bb6db6 100644 --- a/constants/Evidence.tsv +++ b/constants/Evidence.tsv @@ -1,6 +1,6 @@ ontology.id ontology.label variable.name function.name ECO:0006016 author statement from published clinical study AUTHOR_STATEMENT_FROM_PCS authorStatementFromPublishedClinicalStudy ECO:0007539 author statement from published clinical study used in automatic assertion AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC authorStatementFromPublishedClinicalStudyAutomaticAssertion -ECO:0006017 author statement from published clinical study used in manual assertion AUTHOR_STATEMENT_FROM_PCS_MANUAL authorStatementFromPublishedClinicalStudyManualAssertion -ECO:0000033 author statement supported by traceable reference AUTHOR_STATEMENT_TRACEABLE_REFERENCE authorStatementSupportedByTraceableReference -ECO:0006154 self-reported patient statement evidence SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE selfReportedPatientStatementEvidence \ No newline at end of file +ECO:0006017 author statement from published clinical study used in manual assertion AUTHOR_STATEMENT_FROM_PCS_MANUAL authorStatementFromPublishedClinicalStudyManualAssertion +ECO:0000033 author statement supported by traceable reference AUTHOR_STATEMENT_TRACEABLE_REFERENCE authorStatementSupportedByTraceableReference +ECO:0006154 self-reported patient statement evidence SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE selfReportedPatientStatementEvidence \ No newline at end of file diff --git a/constants/MaterialSample.tsv b/constants/MaterialSample.tsv new file mode 100644 index 00000000..69b01290 --- /dev/null +++ b/constants/MaterialSample.tsv @@ -0,0 +1,3 @@ +ontology.id ontology.label variable.name function.name +EFO:0009655 abnormal sample ABNORMAL_SAMPLE abnormalSample +EFO:0009654 reference sample REFERENCE_SAMPLE referenceSample diff --git a/docs/constants.rst b/docs/constants.rst index 5f68271e..310d1010 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -65,8 +65,8 @@ Terms from the `NCI Thesaurus `_ are "NCIT:C185194", "Cerebrospinal Fluid Sample", "cerebrospinalFluidSample()" "NCIT:C156435", "Formalin-Fixed Paraffin-Embedded DNA", "formalinFixedParaffinEmbeddedDNA()" "NCIT:C13195", "Bronchoalveolar Lavage Fluid", "bronchoalveolarLavageFluid()" - "NCIT:C187062", "Pericardial Fluid Specimen", "Pericardial Fluid Specimen()" - "NCIT:C185197", "Peritoneal Fluid Sample", "Peritoneal Fluid Sample()" + "NCIT:C187062", "Pericardial Fluid Specimen", "pericardialFluidSpecimen()" + "NCIT:C185197", "Peritoneal Fluid Sample", "peritonealFluidSample()" "NCIT:C163995", "Total RNA", "totalRNA()" "NCIT:C18009", "Tumor Tissue", "tumorTissue()" @@ -99,9 +99,9 @@ Terms from the `Evidence and Con clusion Ontology `_ are used to describe l "HP:0012832", "Bilateral", "bilateral()" +MaterialSample +^^^^^^^^^^^^^^ + +Terms from the `EFO `_ to specify the status of the sample. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "EFO:0009655", "abnormal sample", "abnormalSample()" + "EFO:0009654", "reference sample", "referenceSample()" + + MedicalActions ^^^^^^^^^^^^^^ diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java new file mode 100644 index 00000000..dfcdf261 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java @@ -0,0 +1,29 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class BiospecimenType { + + private static final OntologyClass BONE_MARROW_ASPIRATE = OntologyClassBuilder.ontologyClass("NCIT:C133261", "Bone Marrow Aspirate"); + private static final OntologyClass BLOOD_DNA = OntologyClassBuilder.ontologyClass("NCIT:C158416", "Blood DNA"); + private static final OntologyClass CSF_SAMPLE = OntologyClassBuilder.ontologyClass("NCIT:C185194", "Cerebrospinal Fluid Sample"); + private static final OntologyClass FORMALIN_FIXED_PARAFIN_DNA = OntologyClassBuilder.ontologyClass("NCIT:C156435", "Formalin-Fixed Paraffin-Embedded DNA"); + private static final OntologyClass BAL_FLUID = OntologyClassBuilder.ontologyClass("NCIT:C13195", "Bronchoalveolar Lavage Fluid"); + private static final OntologyClass PERICARDIAL_FLUID_SAMPLE = OntologyClassBuilder.ontologyClass("NCIT:C187062", "Pericardial Fluid Specimen"); + private static final OntologyClass PERTONIAL_FLUID_SAMPLE = OntologyClassBuilder.ontologyClass("NCIT:C185197", "Peritoneal Fluid Sample"); + private static final OntologyClass TOTAL_RNA = OntologyClassBuilder.ontologyClass("NCIT:C163995", "Total RNA"); + private static final OntologyClass TUMOR_TISSUE = OntologyClassBuilder.ontologyClass("NCIT:C18009", "Tumor Tissue"); + + + public static OntologyClass boneMarrowAspirate() { return BONE_MARROW_ASPIRATE; } + public static OntologyClass bloodDNA() { return BLOOD_DNA; } + public static OntologyClass cerebrospinalFluidSample() { return CSF_SAMPLE; } + public static OntologyClass formalinFixedParaffinEmbeddedDNA() { return FORMALIN_FIXED_PARAFIN_DNA; } + public static OntologyClass bronchoalveolarLavageFluid() { return BAL_FLUID; } + public static OntologyClass pericardialFluidSpecimen() { return PERICARDIAL_FLUID_SAMPLE; } + public static OntologyClass peritonealFluidSample() { return PERTONIAL_FLUID_SAMPLE; } + public static OntologyClass totalRNA() { return TOTAL_RNA; } + public static OntologyClass tumorTissue() { return TUMOR_TISSUE; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java new file mode 100644 index 00000000..d5baa9d5 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java @@ -0,0 +1,23 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class DiseaseGrade { + + private static final OntologyClass GRADE_1 = OntologyClassBuilder.ontologyClass("NCIT:C28077", "Grade 1"); + private static final OntologyClass GRADE_2 = OntologyClassBuilder.ontologyClass("NCIT:C28078", "Grade 2"); + private static final OntologyClass GRADE_3 = OntologyClassBuilder.ontologyClass("NCIT:C28079", "Grade 3"); + private static final OntologyClass GRADE_3A = OntologyClassBuilder.ontologyClass("NCIT:C28080", "Grade 3a"); + private static final OntologyClass GRADE_3B = OntologyClassBuilder.ontologyClass("NCIT:C28081", "Grade 3b"); + private static final OntologyClass GRADE_4 = OntologyClassBuilder.ontologyClass("NCIT:C28082", "Grade 4"); + + + public static OntologyClass grade1() { return GRADE_1; } + public static OntologyClass grade2() { return GRADE_2; } + public static OntologyClass grade3() { return GRADE_3; } + public static OntologyClass grade3a() { return GRADE_3A; } + public static OntologyClass grade3b() { return GRADE_3B; } + public static OntologyClass grade4() { return GRADE_4; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java new file mode 100644 index 00000000..172e2420 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java @@ -0,0 +1,21 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class Evidence { + + private static final OntologyClass AUTHOR_STATEMENT_FROM_PCS = OntologyClassBuilder.ontologyClass("ECO:0006016", "author statement from published clinical study"); + private static final OntologyClass AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC = OntologyClassBuilder.ontologyClass("ECO:0007539", "author statement from published clinical study used in automatic assertion"); + private static final OntologyClass AUTHOR_STATEMENT_FROM_PCS_MANUAL = OntologyClassBuilder.ontologyClass("ECO:0006017", "author statement from published clinical study used in manual assertion"); + private static final OntologyClass AUTHOR_STATEMENT_TRACEABLE_REFERENCE = OntologyClassBuilder.ontologyClass("ECO:0000033", "author statement supported by traceable reference"); + private static final OntologyClass SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE = OntologyClassBuilder.ontologyClass("ECO:0006154", "self-reported patient statement evidence"); + + + public static OntologyClass authorStatementFromPublishedClinicalStudy() { return AUTHOR_STATEMENT_FROM_PCS; } + public static OntologyClass authorStatementFromPublishedClinicalStudyAutomaticAssertion() { return AUTHOR_STATEMENT_FROM_PCS_AUTOMATIC; } + public static OntologyClass authorStatementFromPublishedClinicalStudyManualAssertion() { return AUTHOR_STATEMENT_FROM_PCS_MANUAL; } + public static OntologyClass authorStatementSupportedByTraceableReference() { return AUTHOR_STATEMENT_TRACEABLE_REFERENCE; } + public static OntologyClass selfReportedPatientStatementEvidence() { return SELF_REPORTED_PATIENT_STATEMENT_EVIDENCE; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java new file mode 100644 index 00000000..cf5683d4 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java @@ -0,0 +1,15 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class MaterialSample { + + private static final OntologyClass ABNORMAL_SAMPLE = OntologyClassBuilder.ontologyClass("EFO:0009655", "abnormal sample"); + private static final OntologyClass REFERENCE_SAMPLE = OntologyClassBuilder.ontologyClass("EFO:0009654", "reference sample"); + + + public static OntologyClass abnormalSample() { return ABNORMAL_SAMPLE; } + public static OntologyClass referenceSample() { return REFERENCE_SAMPLE; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java new file mode 100644 index 00000000..373d37d3 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java @@ -0,0 +1,17 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class TumorProgression { + + private static final OntologyClass PRIMARY_NEOPLASM = OntologyClassBuilder.ontologyClass("NCIT:C8509", "Primary Neoplasm"); + private static final OntologyClass METASTATIC_NEOPLASM = OntologyClassBuilder.ontologyClass("NCIT:C3261", "Metastatic Neoplasm"); + private static final OntologyClass RECURRENT_NEOPLASM = OntologyClassBuilder.ontologyClass("NCIT:C4798", "Recurrent Neoplasm"); + + + public static OntologyClass primaryNeoplasm() { return PRIMARY_NEOPLASM; } + public static OntologyClass metastaticNeoplasm() { return METASTATIC_NEOPLASM; } + public static OntologyClass recurrentNeoplasm() { return RECURRENT_NEOPLASM; } + +} From 8d7008ffd55f3f508463cbe7312c42d06e9648e0 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 13:06:12 -0400 Subject: [PATCH 40/79] Draft `PhenopacketPrinter` API, write prototype `PhenopacketPrinter`s for Protobuf, JSON, and YAML. Signed-off-by: Daniel Danis --- phenopacket-tools-io/pom.xml | 11 +++-- .../src/main/java/module-info.java | 2 + .../phenopackettools/io/JsonPrinter.java | 31 ++++++++++++ .../phenopackettools/io/NaiveYamlPrinter.java | 48 +++++++++++++++++++ .../io/PhenopacketPrinter.java | 20 ++++++++ .../io/PhenopacketPrinterFactory.java | 18 +++++++ .../PhenopacketPrinterFactoryException.java | 26 ++++++++++ .../io/PhenopacketPrinterFactoryImpl.java | 23 +++++++++ .../io/NaiveYamlPrinterTest.java | 36 ++++++++++++++ 9 files changed, 211 insertions(+), 4 deletions(-) create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java create mode 100644 phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml index cd4817f9..7a33fdfa 100644 --- a/phenopacket-tools-io/pom.xml +++ b/phenopacket-tools-io/pom.xml @@ -29,10 +29,6 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml - - org.yaml - snakeyaml - com.google.protobuf protobuf-java @@ -41,6 +37,13 @@ com.google.protobuf protobuf-java-util + + + org.phenopackets.phenopackettools + phenopacket-tools-test + ${project.parent.version} + test + \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/module-info.java b/phenopacket-tools-io/src/main/java/module-info.java index ee92b566..07a662f6 100644 --- a/phenopacket-tools-io/src/main/java/module-info.java +++ b/phenopacket-tools-io/src/main/java/module-info.java @@ -4,6 +4,8 @@ requires org.phenopackets.schema; requires com.google.protobuf; requires com.google.protobuf.util; + requires com.fasterxml.jackson.databind; + requires com.fasterxml.jackson.dataformat.yaml; requires org.slf4j; exports org.phenopackets.phenopackettools.io; diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java new file mode 100644 index 00000000..df55a53d --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java @@ -0,0 +1,31 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; + +class JsonPrinter implements PhenopacketPrinter { + + private static final JsonFormat.Printer PRINTER = JsonFormat.printer(); + + private static final JsonPrinter INSTANCE = new JsonPrinter<>(); + + static JsonPrinter getInstance() { + // We know that JsonFormat can serialize ANY Message, hence the unchecked cast is safe. + + //noinspection unchecked + return (JsonPrinter) INSTANCE; + } + private JsonPrinter() { + } + @Override + public void print(T message, OutputStream os) throws IOException { + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + PRINTER.appendTo(message, writer); + writer.flush(); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java new file mode 100644 index 00000000..2e7b7148 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java @@ -0,0 +1,48 @@ +package org.phenopackets.phenopackettools.io; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.google.protobuf.MessageOrBuilder; +import com.google.protobuf.util.JsonFormat; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * A naive implementation of YAML printer that first prints the {@link MessageOrBuilder} into a JSON string, + * then decodes the string into {@link JsonNode} and prints as YAML document. + *

+ * This is, of course, not efficient. However, it works OK as a prototype printer. + */ +class NaiveYamlPrinter implements PhenopacketPrinter { + + private static final JsonFormat.Printer PB_PRINTER = JsonFormat.printer(); + + private static final NaiveYamlPrinter INSTANCE = new NaiveYamlPrinter<>(); + + static NaiveYamlPrinter getInstance() { + // We know that JsonFormat can serialize ANY Message, hence the unchecked cast is safe. + + //noinspection unchecked + return (NaiveYamlPrinter) INSTANCE; + } + + private final ObjectMapper jsonMapper; + private final ObjectMapper yamlMapper; + + private NaiveYamlPrinter() { + jsonMapper = new ObjectMapper(); + yamlMapper = YAMLMapper.builder() + .disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER) + .build(); + } + + @Override + public void print(T message, OutputStream os) throws IOException { + String jsonString = PB_PRINTER.print(message); + JsonNode jsonNode = jsonMapper.readTree(jsonString); + yamlMapper.writeValue(os, jsonNode); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java new file mode 100644 index 00000000..72a73a86 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java @@ -0,0 +1,20 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.MessageOrBuilder; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +public interface PhenopacketPrinter { + + void print(T message, OutputStream os) throws IOException; + + default void print(T message, Path output) throws IOException { + try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(output))) { + print(message, os); + } + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java new file mode 100644 index 00000000..cb0e92e6 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java @@ -0,0 +1,18 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +public interface PhenopacketPrinterFactory { + + static PhenopacketPrinterFactory getInstance() { + return PhenopacketPrinterFactoryImpl.INSTANCE; + } + + PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, + PhenopacketElement element, + PhenopacketFormat format) throws PhenopacketPrinterFactoryException; + +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java new file mode 100644 index 00000000..225983cf --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryException.java @@ -0,0 +1,26 @@ +package org.phenopackets.phenopackettools.io; + +import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; + +public class PhenopacketPrinterFactoryException extends PhenopacketToolsRuntimeException { + + public PhenopacketPrinterFactoryException() { + super(); + } + + public PhenopacketPrinterFactoryException(String message) { + super(message); + } + + public PhenopacketPrinterFactoryException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketPrinterFactoryException(Throwable cause) { + super(cause); + } + + protected PhenopacketPrinterFactoryException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java new file mode 100644 index 00000000..85bbee64 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java @@ -0,0 +1,23 @@ +package org.phenopackets.phenopackettools.io; + +import com.google.protobuf.Message; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; + +class PhenopacketPrinterFactoryImpl implements PhenopacketPrinterFactory { + + static final PhenopacketPrinterFactoryImpl INSTANCE = new PhenopacketPrinterFactoryImpl(); + + @Override + public PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, + PhenopacketElement element, + PhenopacketFormat format) throws PhenopacketPrinterFactoryException { + return switch (format) { + case PROTOBUF -> Message::writeTo; + case JSON -> JsonPrinter.getInstance(); + case YAML -> NaiveYamlPrinter.getInstance(); + }; + } + +} diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java new file mode 100644 index 00000000..e2b5e51f --- /dev/null +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java @@ -0,0 +1,36 @@ +package org.phenopackets.phenopackettools.io; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenopackettools.test.TestData; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; + +import java.nio.file.Path; + +@Disabled +public class NaiveYamlPrinterTest { + + @Test + public void printPhenopacket() throws Exception { + NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); + Phenopacket pp = TestData.V1.comprehensivePhenopacket(); + printer.print(pp, Path.of("phenopacket.v1.yaml")); + } + + @Test + public void printFamily() throws Exception { + NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); + Family pp = TestData.V1.comprehensiveFamily(); + printer.print(pp, Path.of("family.v1.yaml")); + } + + @Test + public void printCohort() throws Exception { + NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); + Cohort pp = TestData.V1.comprehensiveCohort(); + printer.print(pp, Path.of("cohort.v1.yaml")); + } + +} \ No newline at end of file From c64dd08095fdb6ba0a5355cfb6e64f5ff98ab8b4 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 13:32:20 -0400 Subject: [PATCH 41/79] Use `PhenopacketPrinter` in `convert` command. Signed-off-by: Daniel Danis --- .../command/ConvertCommand.java | 58 +++++++------------ .../io/PhenopacketPrinterFactory.java | 2 - .../io/PhenopacketPrinterFactoryImpl.java | 2 - 3 files changed, 20 insertions(+), 42 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 8bb481bc..e5373506 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -1,10 +1,11 @@ package org.phenopackets.phenopackettools.command; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.io.PhenopacketPrinter; +import org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory; import org.phenopackets.schema.v1.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,14 +76,11 @@ protected Integer execute() { converted.add(new MessageAndPath(v2, mp.path())); } - // (3) Set the output format if necessary. - if (convertSection.outputFormat == null) { - LOGGER.info("Output format (--output-format) not provided, writing data in the input format `{}`", inputSection.format); - convertSection.outputFormat = inputSection.format; - } + // (3) Configure the output format. + PhenopacketPrinter printer = configurePhenopacketPrinter(); // (4) Write out the output(s). - return writeOutConverted(converted); + return writeOutConverted(converted, printer); } /** @@ -110,7 +108,19 @@ private boolean checkInputArgumentsAreOk() { return true; } - private int writeOutConverted(List converted) { + private PhenopacketPrinter configurePhenopacketPrinter() { + PhenopacketFormat format; + if (convertSection.outputFormat == null) { + LOGGER.info("Output format (--output-format) not provided, writing data in the input format `{}`", inputSection.format); + format = inputSection.format; + } else + format = convertSection.outputFormat; + + PhenopacketPrinterFactory factory = PhenopacketPrinterFactory.getInstance(); + return factory.forFormat(PhenopacketSchemaVersion.V2, format); + } + + private int writeOutConverted(List converted, PhenopacketPrinter printer) { if (converted.size() == 1) { // Writing out item, either from STDIN or from one `-i` options. MessageAndPath mp = converted.get(0); @@ -122,7 +132,7 @@ private int writeOutConverted(List converted) { } else { os = openOutputStream(mp.path()); } - writeMessage(mp.message(), convertSection.outputFormat, os); + printer.print(mp.message(), os); } catch (IOException e) { LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); return 1; @@ -139,7 +149,7 @@ private int writeOutConverted(List converted) { // Writing out >1 items provided by `-i` options. for (MessageAndPath mp : converted) { try (OutputStream os = openOutputStream(mp.path())) { - writeMessage(mp.message(), convertSection.outputFormat, os); + printer.print(mp.message(), os); } catch (IOException e) { LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); return 1; @@ -169,32 +179,4 @@ private BufferedOutputStream openOutputStream(Path inputPath) throws IOException return new BufferedOutputStream(Files.newOutputStream(output)); } - /** - * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. - *

- * Uses {@link } - * @param message message to be written out. - * @param format format to write out - * @param os where to write - * @throws IOException in case of I/O errors during the output - */ - protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { - switch (format) { - case PROTOBUF -> { - LOGGER.debug("Writing protobuf message"); - message.writeTo(os); - } - case JSON -> { - LOGGER.debug("Writing JSON message"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - JsonFormat.printer().appendTo(message, writer); - writer.flush(); - } - case YAML -> { - // TODO - implement - throw new RuntimeException("YAML printer is not yet implemented"); - } - } - } - } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java index cb0e92e6..69cb22bb 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java @@ -1,7 +1,6 @@ package org.phenopackets.phenopackettools.io; import com.google.protobuf.Message; -import org.phenopackets.phenopackettools.core.PhenopacketElement; import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; @@ -12,7 +11,6 @@ static PhenopacketPrinterFactory getInstance() { } PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, - PhenopacketElement element, PhenopacketFormat format) throws PhenopacketPrinterFactoryException; } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java index 85bbee64..3fd23f2f 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java @@ -1,7 +1,6 @@ package org.phenopackets.phenopackettools.io; import com.google.protobuf.Message; -import org.phenopackets.phenopackettools.core.PhenopacketElement; import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; @@ -11,7 +10,6 @@ class PhenopacketPrinterFactoryImpl implements PhenopacketPrinterFactory { @Override public PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, - PhenopacketElement element, PhenopacketFormat format) throws PhenopacketPrinterFactoryException { return switch (format) { case PROTOBUF -> Message::writeTo; From 015546eb2c919c8f9f2c845c841b9e303b31d83c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 15:02:41 -0400 Subject: [PATCH 42/79] Implement naive YAML parser. Signed-off-by: Daniel Danis --- .../io/base/BasePhenopacketParser.java | 6 +- .../io/base/NaiveYamlParser.java | 33 +++ .../io/v1/V1PhenopacketParser.java | 5 - .../io/v2/V2PhenopacketParser.java | 5 - .../io/v1/V1PhenopacketParserTest.java | 8 +- .../io/v2/V2PhenopacketParserTest.java | 7 +- .../phenopackettools/io/v1/cohort.yaml | 196 +++++++++++++++ .../phenopackettools/io/v1/family.yaml | 209 ++++++++++++++++ .../phenopackettools/io/v1/phenopacket.yaml | 146 +++++++++++ .../phenopackettools/io/v2/cohort.yaml | 222 +++++++++++++++++ .../phenopackettools/io/v2/family.yaml | 235 ++++++++++++++++++ .../phenopackettools/io/v2/phenopacket.yaml | 165 ++++++++++++ 12 files changed, 1218 insertions(+), 19 deletions(-) create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml create mode 100644 phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java index b435d3d1..4383228d 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/BasePhenopacketParser.java @@ -47,5 +47,9 @@ private Message readJsonMessage(PhenopacketElement element, InputStream is) thro protected abstract Message.Builder prepareBuilder(PhenopacketElement element); - protected abstract Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException; + private Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException { + Message.Builder builder = prepareBuilder(element); + NaiveYamlParser.INSTANCE.deserializeYamlMessage(is, builder); + return builder.build(); + } } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java new file mode 100644 index 00000000..c84a0028 --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/base/NaiveYamlParser.java @@ -0,0 +1,33 @@ +package org.phenopackets.phenopackettools.io.base; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A naive and inefficient implementation of YAML -> {@link Message} parsing that first maps YAML into JSON String + * and then decodes the JSON into {@link Message}. + */ +class NaiveYamlParser { + + private static final JsonFormat.Parser JSON_PARSER = JsonFormat.parser(); + + static final NaiveYamlParser INSTANCE = new NaiveYamlParser(); + private final ObjectMapper yamlMapper; + private final ObjectMapper jsonMapper; + private NaiveYamlParser() { + yamlMapper = new YAMLMapper(); + jsonMapper = new ObjectMapper(); + } + + void deserializeYamlMessage(InputStream is, Message.Builder builder) throws IOException { + JsonNode node = yamlMapper.readTree(is); + String jsonString = jsonMapper.writeValueAsString(node); + JSON_PARSER.merge(jsonString, builder); + } +} diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java index 44647884..3d4ab64b 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParser.java @@ -23,11 +23,6 @@ protected Message readProtobufMessage(PhenopacketElement element, InputStream is }; } - @Override - protected Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException { - throw new RuntimeException("Not yet implemented"); // TODO - implement - } - @Override protected Message.Builder prepareBuilder(PhenopacketElement element) { return switch (element) { diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java index 8221523f..1ace731d 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParser.java @@ -23,11 +23,6 @@ protected Message readProtobufMessage(PhenopacketElement element, InputStream is }; } - @Override - protected Message readYamlMessage(PhenopacketElement element, InputStream is) throws IOException { - throw new RuntimeException("Not yet implemented"); // TODO - implement - } - @Override protected Message.Builder prepareBuilder(PhenopacketElement element) { return switch (element) { diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java index 749a843c..86fb9ed6 100644 --- a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v1/V1PhenopacketParserTest.java @@ -20,6 +20,7 @@ public class V1PhenopacketParserTest { private static final Path BASE = TestBase.BASE_DIR.resolve("v1"); + private PhenopacketParser parser; @BeforeEach @@ -35,10 +36,9 @@ public void setUp() { " JSON, PHENOPACKET, phenopacket.json", " JSON, FAMILY, family.json", " JSON, COHORT, cohort.json", - // TODO - finalize once we settle down on the YAML format -// " YAML, PHENOPACKET, phenopacket.yaml", -// " YAML, FAMILY, family.yaml", -// " YAML, COHORT, cohort.yaml", + " YAML, PHENOPACKET, phenopacket.yaml", + " YAML, FAMILY, family.yaml", + " YAML, COHORT, cohort.yaml", }) public void weGetExpectedClassForGivenFormatAndElement(PhenopacketFormat format, PhenopacketElement element, diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java index c0eddb78..41ed246d 100644 --- a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/v2/V2PhenopacketParserTest.java @@ -36,10 +36,9 @@ public void setUp() { " JSON, PHENOPACKET, phenopacket.json", " JSON, FAMILY, family.json", " JSON, COHORT, cohort.json", - // TODO - finalize once we settle down on the YAML format -// " YAML, PHENOPACKET, phenopacket.yaml", -// " YAML, FAMILY, family.yaml", -// " YAML, COHORT, cohort.yaml", + " YAML, PHENOPACKET, phenopacket.yaml", + " YAML, FAMILY, family.yaml", + " YAML, COHORT, cohort.yaml", }) public void weGetExpectedClassForGivenFormatAndElement(PhenopacketFormat format, PhenopacketElement element, diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml new file mode 100644 index 00000000..5f9ac9c2 --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/cohort.yaml @@ -0,0 +1,196 @@ +id: "comprehensive-cohort-id" +description: "A description of the example cohort." +members: +- id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + ageAtCollection: + age: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + classOfOnset: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + negated: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + ageOfOnset: + age: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + classOfOnset: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + ageOfIndividualAtCollection: + age: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + genes: + - id: "HGNC1:3688" + symbol: "FGFR1" + variants: + - hgvsAllele: + hgvs: "NM_001848.2:c.877G>A" + zygosity: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + classOfOnset: + id: "HP:0003577" + label: "Congenital onset" + htsFiles: + - uri: "file://data/genomes/P000001C" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- subject: + id: "MOTHER" + sex: "FEMALE" +- subject: + id: "FATHER" + sex: "MALE" +htsFiles: +- uri: "file://data/genomes/FAM000001" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml new file mode 100644 index 00000000..02f6a92c --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/family.yaml @@ -0,0 +1,209 @@ +id: "comprehensive-family-id" +proband: + id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + ageAtCollection: + age: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + classOfOnset: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + negated: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + ageOfOnset: + age: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + classOfOnset: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + ageOfIndividualAtCollection: + age: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + genes: + - id: "HGNC1:3688" + symbol: "FGFR1" + variants: + - hgvsAllele: + hgvs: "NM_001848.2:c.877G>A" + zygosity: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + classOfOnset: + id: "HP:0003577" + label: "Congenital onset" + htsFiles: + - uri: "file://data/genomes/P000001C" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +relatives: +- subject: + id: "MOTHER" + sex: "FEMALE" +- subject: + id: "FATHER" + sex: "MALE" +pedigree: + persons: + - individualId: "14 year-old boy" + paternalId: "FATHER" + maternalId: "MOTHER" + sex: "MALE" + affectedStatus: "AFFECTED" + - individualId: "MOTHER" + sex: "FEMALE" + affectedStatus: "UNAFFECTED" + - individualId: "FATHER" + sex: "MALE" + affectedStatus: "UNAFFECTED" +htsFiles: +- uri: "file://data/genomes/FAM000001" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml new file mode 100644 index 00000000..bafcc88b --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v1/phenopacket.yaml @@ -0,0 +1,146 @@ +id: "comprehensive-phenopacket-id" +subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + ageAtCollection: + age: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" +phenotypicFeatures: +- type: + id: "HP:0001558" + label: "Decreased fetal movement" + classOfOnset: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + negated: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + ageOfOnset: + age: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + classOfOnset: + id: "HP:0011463" + label: "Childhood onset" +biosamples: +- id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + ageOfIndividualAtCollection: + age: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" +genes: +- id: "HGNC1:3688" + symbol: "FGFR1" +variants: +- hgvsAllele: + hgvs: "NM_001848.2:c.877G>A" + zygosity: + id: "GENO:0000135" + label: "heterozygous" +diseases: +- term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + classOfOnset: + id: "HP:0003577" + label: "Congenital onset" +htsFiles: +- uri: "file://data/genomes/P000001C" + description: "Whole genome sequencing VCF output" + htsFormat: "VCF" + genomeAssembly: "GRCh38.p13" + individualToSampleIdentifiers: + "14 year-old boy": "P000001C" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "1.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml new file mode 100644 index 00000000..55e7969d --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/cohort.yaml @@ -0,0 +1,222 @@ +id: "comprehensive-cohort-id" +description: "A description of the example cohort." +members: +- id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + timeAtLastEncounter: + age: + iso8601duration: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + onset: + ontologyClass: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + excluded: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + onset: + age: + iso8601duration: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + onset: + ontologyClass: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + timeOfCollection: + age: + iso8601duration: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + materialSample: + id: "EFO:0009655" + label: "abnormal sample" + interpretations: + - id: "comprehensive-phenopacket-id" + progressStatus: "SOLVED" + diagnosis: + disease: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + genomicInterpretations: + - subjectOrBiosampleId: "14 year-old boy" + interpretationStatus: "CAUSATIVE" + variantInterpretation: + variationDescriptor: + expressions: + - syntax: "hgvs" + value: "NM_001848.2:c.877G>A" + allelicState: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + onset: + ontologyClass: + id: "HP:0003577" + label: "Congenital onset" + files: + - uri: "file://data/genomes/P000001C" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- subject: + id: "MOTHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "FEMALE" + taxonomy: {} +- subject: + id: "FATHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "MALE" + taxonomy: {} +files: +- uri: "file://data/genomes/FAM000001" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml new file mode 100644 index 00000000..02e2b78e --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/family.yaml @@ -0,0 +1,235 @@ +id: "comprehensive-family-id" +proband: + id: "comprehensive-phenopacket-id" + subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + timeAtLastEncounter: + age: + iso8601duration: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + phenotypicFeatures: + - type: + id: "HP:0001558" + label: "Decreased fetal movement" + onset: + ontologyClass: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + excluded: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + onset: + age: + iso8601duration: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." + - type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + onset: + ontologyClass: + id: "HP:0011463" + label: "Childhood onset" + biosamples: + - id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + timeOfCollection: + age: + iso8601duration: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + materialSample: + id: "EFO:0009655" + label: "abnormal sample" + interpretations: + - id: "comprehensive-phenopacket-id" + progressStatus: "SOLVED" + diagnosis: + disease: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + genomicInterpretations: + - subjectOrBiosampleId: "14 year-old boy" + interpretationStatus: "CAUSATIVE" + variantInterpretation: + variationDescriptor: + expressions: + - syntax: "hgvs" + value: "NM_001848.2:c.877G>A" + allelicState: + id: "GENO:0000135" + label: "heterozygous" + diseases: + - term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + onset: + ontologyClass: + id: "HP:0003577" + label: "Congenital onset" + files: + - uri: "file://data/genomes/P000001C" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" + metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +relatives: +- subject: + id: "MOTHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "FEMALE" + taxonomy: {} +- subject: + id: "FATHER" + dateOfBirth: "1970-01-01T00:00:00Z" + timeAtLastEncounter: {} + sex: "MALE" + taxonomy: {} +pedigree: + persons: + - individualId: "14 year-old boy" + paternalId: "FATHER" + maternalId: "MOTHER" + sex: "MALE" + affectedStatus: "AFFECTED" + - individualId: "MOTHER" + sex: "FEMALE" + affectedStatus: "UNAFFECTED" + - individualId: "FATHER" + sex: "MALE" + affectedStatus: "UNAFFECTED" +files: +- uri: "file://data/genomes/FAM000001" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + MOTHER: "P000001M" + FATHER: "P000001F" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." diff --git a/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml new file mode 100644 index 00000000..2a5986de --- /dev/null +++ b/phenopacket-tools-io/src/test/resources/org/phenopackets/phenopackettools/io/v2/phenopacket.yaml @@ -0,0 +1,165 @@ +id: "comprehensive-phenopacket-id" +subject: + id: "14 year-old boy" + alternateIds: + - "boy" + - "patient" + - "proband" + dateOfBirth: "1970-01-02T10:17:36.000000100Z" + timeAtLastEncounter: + age: + iso8601duration: "P14Y" + sex: "MALE" + karyotypicSex: "XY" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" +phenotypicFeatures: +- type: + id: "HP:0001558" + label: "Decreased fetal movement" + onset: + ontologyClass: + id: "HP:0011461" + label: "Fetal onset" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0031910" + label: "Abnormal cranial nerve physiology" + excluded: true + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0011463" + label: "Macroscopic hematuria" + modifiers: + - id: "HP:0031796" + label: "Recurrent" + onset: + age: + iso8601duration: "P14Y" + evidence: + - evidenceCode: + id: "ECO:0000033" + label: "author statement supported by traceable reference" + reference: + id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." +- type: + id: "HP:0001270" + label: "Motor delay" + severity: + id: "HP:0012825" + label: "Mild" + onset: + ontologyClass: + id: "HP:0011463" + label: "Childhood onset" +biosamples: +- id: "biosample-id" + individualId: "14 year-old boy" + description: "Muscle biopsy of 14 year-old boy" + sampledTissue: + id: "UBERON:0003403" + label: "skin of forearm" + taxonomy: + id: "NCBITaxon:9606" + label: "homo sapiens" + timeOfCollection: + age: + iso8601duration: "P14Y" + histologicalDiagnosis: + id: "NCIT:C38757" + label: "Negative Finding" + tumorProgression: + id: "NCIT:C3677" + label: "Benign Neoplasm" + tumorGrade: + id: "NCIT:C28076" + label: "Disease Grade Qualifier" + diagnosticMarkers: + - id: "NCIT:C68748" + label: "HER2/Neu Positive" + materialSample: + id: "EFO:0009655" + label: "abnormal sample" +interpretations: +- id: "comprehensive-phenopacket-id" + progressStatus: "SOLVED" + diagnosis: + disease: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + genomicInterpretations: + - subjectOrBiosampleId: "14 year-old boy" + interpretationStatus: "CAUSATIVE" + variantInterpretation: + variationDescriptor: + expressions: + - syntax: "hgvs" + value: "NM_001848.2:c.877G>A" + allelicState: + id: "GENO:0000135" + label: "heterozygous" +diseases: +- term: + id: "OMIM:101600" + label: "PFEIFFER SYNDROME" + onset: + ontologyClass: + id: "HP:0003577" + label: "Congenital onset" +files: +- uri: "file://data/genomes/P000001C" + individualToFileIdentifiers: + "14 year-old boy": "P000001C" + fileAttributes: + genomeAssembly: "GRCh38.p13" + fileFormat: "vcf" + description: "Whole genome sequencing VCF output" +metaData: + created: "2022-10-03T16:39:04.000123456Z" + createdBy: "Peter R." + submittedBy: "PhenopacketLab" + resources: + - id: "hp" + name: "human phenotype ontology" + url: "http://purl.obolibrary.org/obo/hp.owl" + version: "2018-03-08" + namespacePrefix: "HP" + iriPrefix: "http://purl.obolibrary.org/obo/HP_" + - id: "geno" + name: "Genotype Ontology" + url: "http://purl.obolibrary.org/obo/geno.owl" + version: "19-03-2018" + namespacePrefix: "GENO" + iriPrefix: "http://purl.obolibrary.org/obo/GENO_" + - id: "pubmed" + name: "PubMed" + namespacePrefix: "PMID" + iriPrefix: "https://www.ncbi.nlm.nih.gov/pubmed/" + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "20-03-2020" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + phenopacketSchemaVersion: "2.0.0" + externalReferences: + - id: "PMID:30808312" + description: "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria:\ + \ a case report." From 41fd763fccc0f9d50d0dcb75d4961aee8420ecf9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 15:34:44 -0400 Subject: [PATCH 43/79] Use `phenopacket-tools-io` in CLI. Signed-off-by: Daniel Danis --- .../command/ConvertCommand.java | 6 +- .../command/ExamplesCommand.java | 107 +++++++----------- .../phenopackettools/io/JsonPrinter.java | 15 ++- .../phenopackettools/io/NaiveYamlPrinter.java | 15 ++- .../io/PhenopacketPrinter.java | 11 +- .../io/PhenopacketPrinterFactory.java | 9 +- .../io/PhenopacketPrinterFactoryImpl.java | 3 +- .../io/NaiveYamlPrinterTest.java | 5 +- 8 files changed, 72 insertions(+), 99 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index e5373506..74601ea7 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -77,7 +77,7 @@ protected Integer execute() { } // (3) Configure the output format. - PhenopacketPrinter printer = configurePhenopacketPrinter(); + PhenopacketPrinter printer = configurePhenopacketPrinter(); // (4) Write out the output(s). return writeOutConverted(converted, printer); @@ -108,7 +108,7 @@ private boolean checkInputArgumentsAreOk() { return true; } - private PhenopacketPrinter configurePhenopacketPrinter() { + private PhenopacketPrinter configurePhenopacketPrinter() { PhenopacketFormat format; if (convertSection.outputFormat == null) { LOGGER.info("Output format (--output-format) not provided, writing data in the input format `{}`", inputSection.format); @@ -120,7 +120,7 @@ private PhenopacketPrinter configurePhenopacketPrinter() { return factory.forFormat(PhenopacketSchemaVersion.V2, format); } - private int writeOutConverted(List converted, PhenopacketPrinter printer) { + private int writeOutConverted(List converted, PhenopacketPrinter printer) { if (converted.size() == 1) { // Writing out item, either from STDIN or from one `-i` options. MessageAndPath mp = converted.get(0); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java index 69b3fc0d..ef2d31c9 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java @@ -1,21 +1,17 @@ package org.phenopackets.phenopackettools.command; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.core.PhenopacketFormat; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; import org.phenopackets.phenopackettools.examples.*; +import org.phenopackets.phenopackettools.io.PhenopacketPrinter; +import org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory; import picocli.CommandLine; import picocli.CommandLine.Command; -import java.io.BufferedWriter; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; @@ -29,6 +25,14 @@ public class ExamplesCommand extends BaseCommand { description = "Output directory (default: ${DEFAULT-VALUE})") public Path output = Path.of("."); + private final PhenopacketPrinter jsonPrinter; + private final PhenopacketPrinter yamlPrinter; + + public ExamplesCommand() { + PhenopacketPrinterFactory factory = PhenopacketPrinterFactory.getInstance(); + jsonPrinter = factory.forFormat(PhenopacketSchemaVersion.V2, PhenopacketFormat.JSON); + yamlPrinter = factory.forFormat(PhenopacketSchemaVersion.V2, PhenopacketFormat.YAML); + } @Override protected Integer execute() { @@ -38,22 +42,22 @@ protected Integer execute() { Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); // Phenopackets - output(new AtaxiaWithVitaminEdeficiency().getPhenopacket(), phenopacketDir, "AVED"); - output(new BethlehamMyopathy().getPhenopacket(), phenopacketDir, "bethleham-myopathy"); - output(new Holoprosencephaly5().getPhenopacket(), phenopacketDir, "holoprosencephaly5"); - output(new Marfan().getPhenopacket(), phenopacketDir, "marfan"); - output(new NemalineMyopathyPrenatal().getPhenopacket(), phenopacketDir, "nemalineMyopathy"); - output(new Pseudoexfoliation().getPhenopacket(), phenopacketDir, "pseudoexfoliation"); - output(new DuchenneExon51Deletion().getPhenopacket(), phenopacketDir, "duchenne"); - output(new SquamousCellCancer().getPhenopacket(), phenopacketDir, "squamous-cell-esophageal-carcinoma"); - output(new UrothelialCancer().getPhenopacket(), phenopacketDir, "urothelial-cancer"); - output(new Covid().getPhenopacket(), phenopacketDir, "covid"); - output(new Retinoblastoma().getPhenopacket(), phenopacketDir, "retinoblastoma"); - output(new WarburgMicroSyndrome().getPhenopacket(), phenopacketDir, "warburg-micro-syndrome"); - output(new SevereStatinInducedAutoimmuneMyopathy().getPhenopacket(), phenopacketDir, "statin-myopathy"); + printJsonAndYaml(new AtaxiaWithVitaminEdeficiency().getPhenopacket(), phenopacketDir, "AVED"); + printJsonAndYaml(new BethlehamMyopathy().getPhenopacket(), phenopacketDir, "bethleham-myopathy"); + printJsonAndYaml(new Holoprosencephaly5().getPhenopacket(), phenopacketDir, "holoprosencephaly5"); + printJsonAndYaml(new Marfan().getPhenopacket(), phenopacketDir, "marfan"); + printJsonAndYaml(new NemalineMyopathyPrenatal().getPhenopacket(), phenopacketDir, "nemalineMyopathy"); + printJsonAndYaml(new Pseudoexfoliation().getPhenopacket(), phenopacketDir, "pseudoexfoliation"); + printJsonAndYaml(new DuchenneExon51Deletion().getPhenopacket(), phenopacketDir, "duchenne"); + printJsonAndYaml(new SquamousCellCancer().getPhenopacket(), phenopacketDir, "squamous-cell-esophageal-carcinoma"); + printJsonAndYaml(new UrothelialCancer().getPhenopacket(), phenopacketDir, "urothelial-cancer"); + printJsonAndYaml(new Covid().getPhenopacket(), phenopacketDir, "covid"); + printJsonAndYaml(new Retinoblastoma().getPhenopacket(), phenopacketDir, "retinoblastoma"); + printJsonAndYaml(new WarburgMicroSyndrome().getPhenopacket(), phenopacketDir, "warburg-micro-syndrome"); + printJsonAndYaml(new SevereStatinInducedAutoimmuneMyopathy().getPhenopacket(), phenopacketDir, "statin-myopathy"); // Families - outputFamily(new FamilyWithPedigree().getFamily(), familyDir, "family"); + printJsonAndYaml(new FamilyWithPedigree().getFamily(), familyDir, "family"); // Cohorts // TODO - write a cohort @@ -71,61 +75,28 @@ private static Path createADirectoryIfDoesNotExist(Path path) throws IOException : Files.createDirectories(path); } - private static void output(Message phenopacket, Path outDir, String basename) { - String yamlName = basename + ".yml"; - outputYamlPhenopacket(phenopacket, outDir, yamlName); - String jsonName = basename + ".json"; - outputPhenopacket(phenopacket, outDir, jsonName); - } - - private static void outputPhenopacket(Message phenopacket, Path outdir, String fileName) { - outputJsonMessage(phenopacket, outdir, fileName); - } - - private static void outputYamlPhenopacket(Message phenopacket, Path outdir, String fileName) { - outputYamlMessage(phenopacket, outdir, fileName, "phenopacket"); - - } - - private static void outputFamily(Message family, Path outDir, String basename) { - String yamlName = basename + ".yml"; - outputYamlFamily(family, outDir, yamlName); - String jsonName = basename + ".json"; - outputJsonFamily(family, outDir,jsonName); - } - - private static void outputJsonFamily(Message family, Path outDir, String jsonName) { - outputJsonMessage(family, outDir, jsonName); - } + private void printJsonAndYaml(Message message, Path outDir, String basename) { + Path jsonPath = outDir.resolve(basename + ".json"); + printJsonMessage(message, jsonPath); - private static void outputYamlFamily(Message family, Path outDir, String yamlName) { - outputYamlMessage(family, outDir, yamlName, "family"); + Path yamlPath = outDir.resolve(basename + ".yml"); + printYamlMessage(message, yamlPath); } - private static void outputJsonMessage(Message message, Path outDir, String fileName) { - Path path = outDir.resolve(fileName); - try (BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { - String json = JsonFormat.printer().print(message); - writer.write(json); + private void printJsonMessage(Message message, Path path) { + try { + jsonPrinter.print(message, path); } catch (IOException e) { - throw new PhenopacketToolsRuntimeException(e.getMessage()); + throw new PhenopacketToolsRuntimeException(e); } } - private static void outputYamlMessage(Message family, Path outDir, String yamlName, String messageName) { - Path path = outDir.resolve(yamlName); - ObjectMapper mapper = new ObjectMapper(new YAMLFactory().disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER)); - try (BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { - String jsonString = JsonFormat.printer().print(family); - JsonNode jsonNodeTree = new ObjectMapper().readTree(jsonString); - JsonNode node = JsonNodeFactory.instance.objectNode().set(messageName, jsonNodeTree); - mapper.writeValue(writer, node); + private void printYamlMessage(Message message, Path path) { + try { + yamlPrinter.print(message, path); } catch (IOException e) { - throw new PhenopacketToolsRuntimeException(e.getMessage()); + throw new PhenopacketToolsRuntimeException(e); } } - - - } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java index df55a53d..3e6825d6 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/JsonPrinter.java @@ -8,22 +8,21 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; -class JsonPrinter implements PhenopacketPrinter { +class JsonPrinter implements PhenopacketPrinter { private static final JsonFormat.Printer PRINTER = JsonFormat.printer(); - private static final JsonPrinter INSTANCE = new JsonPrinter<>(); + private static final JsonPrinter INSTANCE = new JsonPrinter(); - static JsonPrinter getInstance() { - // We know that JsonFormat can serialize ANY Message, hence the unchecked cast is safe. - - //noinspection unchecked - return (JsonPrinter) INSTANCE; + static JsonPrinter getInstance() { + return INSTANCE; } + private JsonPrinter() { } + @Override - public void print(T message, OutputStream os) throws IOException { + public void print(Message message, OutputStream os) throws IOException { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); PRINTER.appendTo(message, writer); writer.flush(); diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java index 2e7b7148..9ebc7a0b 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinter.java @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.google.protobuf.Message; import com.google.protobuf.MessageOrBuilder; import com.google.protobuf.util.JsonFormat; @@ -16,17 +17,14 @@ *

* This is, of course, not efficient. However, it works OK as a prototype printer. */ -class NaiveYamlPrinter implements PhenopacketPrinter { +class NaiveYamlPrinter implements PhenopacketPrinter { private static final JsonFormat.Printer PB_PRINTER = JsonFormat.printer(); - private static final NaiveYamlPrinter INSTANCE = new NaiveYamlPrinter<>(); + private static final NaiveYamlPrinter INSTANCE = new NaiveYamlPrinter(); - static NaiveYamlPrinter getInstance() { - // We know that JsonFormat can serialize ANY Message, hence the unchecked cast is safe. - - //noinspection unchecked - return (NaiveYamlPrinter) INSTANCE; + static NaiveYamlPrinter getInstance() { + return INSTANCE; } private final ObjectMapper jsonMapper; @@ -40,9 +38,10 @@ private NaiveYamlPrinter() { } @Override - public void print(T message, OutputStream os) throws IOException { + public void print(Message message, OutputStream os) throws IOException { String jsonString = PB_PRINTER.print(message); JsonNode jsonNode = jsonMapper.readTree(jsonString); yamlMapper.writeValue(os, jsonNode); } + } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java index 72a73a86..878f44fc 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinter.java @@ -1,6 +1,6 @@ package org.phenopackets.phenopackettools.io; -import com.google.protobuf.MessageOrBuilder; +import com.google.protobuf.Message; import java.io.BufferedOutputStream; import java.io.IOException; @@ -8,11 +8,14 @@ import java.nio.file.Files; import java.nio.file.Path; -public interface PhenopacketPrinter { +/** + * The implementors can serialize a top-level element of Phenopacket schema into provided {@link OutputStream}. + */ +public interface PhenopacketPrinter { - void print(T message, OutputStream os) throws IOException; + void print(Message message, OutputStream os) throws IOException; - default void print(T message, Path output) throws IOException { + default void print(Message message, Path output) throws IOException { try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(output))) { print(message, os); } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java index 69cb22bb..05a68c01 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactory.java @@ -1,16 +1,19 @@ package org.phenopackets.phenopackettools.io; -import com.google.protobuf.Message; import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +/** + * The implementors provide {@link PhenopacketPrinter}s for serializing top-level phenopacket elements + * into {@link PhenopacketFormat} using {@link PhenopacketSchemaVersion}. + */ public interface PhenopacketPrinterFactory { static PhenopacketPrinterFactory getInstance() { return PhenopacketPrinterFactoryImpl.INSTANCE; } - PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, - PhenopacketFormat format) throws PhenopacketPrinterFactoryException; + PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, + PhenopacketFormat format) throws PhenopacketPrinterFactoryException; } diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java index 3fd23f2f..55736f6f 100644 --- a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/PhenopacketPrinterFactoryImpl.java @@ -9,8 +9,7 @@ class PhenopacketPrinterFactoryImpl implements PhenopacketPrinterFactory { static final PhenopacketPrinterFactoryImpl INSTANCE = new PhenopacketPrinterFactoryImpl(); @Override - public PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, - PhenopacketFormat format) throws PhenopacketPrinterFactoryException { + public PhenopacketPrinter forFormat(PhenopacketSchemaVersion schemaVersion, PhenopacketFormat format) throws PhenopacketPrinterFactoryException { return switch (format) { case PROTOBUF -> Message::writeTo; case JSON -> JsonPrinter.getInstance(); diff --git a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java index e2b5e51f..55dfc1d0 100644 --- a/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java +++ b/phenopacket-tools-io/src/test/java/org/phenopackets/phenopackettools/io/NaiveYamlPrinterTest.java @@ -12,23 +12,22 @@ @Disabled public class NaiveYamlPrinterTest { + private final NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); + @Test public void printPhenopacket() throws Exception { - NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); Phenopacket pp = TestData.V1.comprehensivePhenopacket(); printer.print(pp, Path.of("phenopacket.v1.yaml")); } @Test public void printFamily() throws Exception { - NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); Family pp = TestData.V1.comprehensiveFamily(); printer.print(pp, Path.of("family.v1.yaml")); } @Test public void printCohort() throws Exception { - NaiveYamlPrinter printer = NaiveYamlPrinter.getInstance(); Cohort pp = TestData.V1.comprehensiveCohort(); printer.print(pp, Path.of("cohort.v1.yaml")); } From 971cacf9b9f6c196c60b22eacbe37f5700689e56 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 2 Nov 2022 15:47:17 -0400 Subject: [PATCH 44/79] Check if the organ system is excluded at the beginning as it is a cheap operation. Signed-off-by: Daniel Danis --- .../phenotype/orgsys/AbstractOrganSystemValidator.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java index 255ba160..74a25b18 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java @@ -87,16 +87,17 @@ private Stream checkPhenotypicFeatures(String individualId, Li // and report otherwise. organSystemLoop: for (TermId organSystemId : organSystemTermIds) { + // Check if the organ system abnormality has been specifically excluded. + if (featuresByExclusion.excludedPhenotypicFeatures().contains(organSystemId)) + continue; // Yes, it was. Let's check the next organ system + + // Check if we have at least one observed annotation for the organ system. for (TermId pf : featuresByExclusion.observedPhenotypicFeatures()) { if (OntologyAlgorithm.existsPath(hpo, pf, organSystemId)) { continue organSystemLoop; // It only takes one termId to annotate an organ system. } } - // Check if the organ system abnormality has been specifically excluded. - if (featuresByExclusion.excludedPhenotypicFeatures().contains(organSystemId)) - continue; // Yes, it was. Let's check the next organ system - // The organSystemId is neither annotated nor excluded. We report a validation error. Term organSystem = hpo.getTermMap().get(organSystemId); ValidationResult result = ValidationResult.error(VALIDATOR_INFO, From cc57b9dffd055bff0269a5329280d9da1b3936ae Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 19:02:59 -0400 Subject: [PATCH 45/79] Adding DiseaseStage as a Constant --- constants/DiseaseStage.tsv | 11 +++++++ constants/rtd_texts.txt | 1 + docs/constants.rst | 21 +++++++++++++ .../builder/constants/DiseaseStage.java | 31 +++++++++++++++++++ .../builders/OntologyClassBuilderTest.java | 22 +++++++++++++ 5 files changed, 86 insertions(+) create mode 100644 constants/DiseaseStage.tsv create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java create mode 100644 phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java diff --git a/constants/DiseaseStage.tsv b/constants/DiseaseStage.tsv new file mode 100644 index 00000000..32aafab0 --- /dev/null +++ b/constants/DiseaseStage.tsv @@ -0,0 +1,11 @@ +ontology.id ontology.label variable.name function.name +NCIT:C28051 Stage 0 STAGE_0 stage0 +NCIT:C27966 Stage I STAGE_I stageI +NCIT:C28054 Stage II STAGE_II stageII +NCIT:C27970 Stage III STAGE_III stageIII +NCIT:C27971 Stage IV STAGE_IV stageIV +NCIT:C66904 New York Heart Association Class I NYHA_I nyhaClassI +NCIT:C66905 New York Heart Association Class II NYHA_II nyhaClassII +NCIT:C66907 New York Heart Association Class III NYHA_III nyhaClassIII +NCIT:C7922 New York Heart Association Class III/IV NYHA_III_IV nyhaClassIII_or_IV +NCIT:C66908 New York Heart Association Class IV NYHA_IV nyhaClassIV diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index dd18506d..a9279e26 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -11,6 +11,7 @@ Response|These codes from `NCI Thesaurus `_ codes should be used to specify laboratory test assays. Gender|`LOINC `_ codes should be used to specify self-reported gender. PathologicalTnm|TNM staging performed as part of pathologic specimen (based on surgical specimens including sentinel lymph node biopsy specimens). +DiseaseStage|These codes from `NCI Thesaurus `_ can be used to denote that clinical stage of cancer or heart failure. Other codes should be used for specific diseases with their own clinical stage systems. Severity|Terms from the `HPO `_ are used to describe the severity, defined as the intensity or degree of a manifestation. Evidence|Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. BiospecimenType|Terms from the `NCI Thesaurus `_ are used to denote the source of a biospecimen. diff --git a/docs/constants.rst b/docs/constants.rst index 310d1010..7a70c8f8 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -104,6 +104,27 @@ Terms from the `Evidence and Con clusion Ontology `_ can be used to denote that clinical stage of cancer or heart failure. Other codes should be used for specific diseases with their own clinical stage systems. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C28051", "Stage 0", "stage0()" + "NCIT:C27966", "Stage I", "stageI()" + "NCIT:C28054", "Stage II", "stageII()" + "NCIT:C27970", "Stage III", "stageIII()" + "NCIT:C27971", "Stage IV", "stageIV()" + "NCIT:C66904", "New York Heart Association Class I", "nyhaClassI()" + "NCIT:C66905", "New York Heart Association Class II", "nyhaClassII()" + "NCIT:C66907", "New York Heart Association Class III", "nyhaClassIII()" + "NCIT:C7922", "New York Heart Association Class III/IV", "nyhaClassIII_or_IV()" + "NCIT:C66908", "New York Heart Association Class IV", "nyhaClassIV()" + + Gender ^^^^^^ diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java new file mode 100644 index 00000000..36a6abb2 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java @@ -0,0 +1,31 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class DiseaseStage { + + private static final OntologyClass STAGE_0 = OntologyClassBuilder.ontologyClass("NCIT:C28051", "Stage 0"); + private static final OntologyClass STAGE_I = OntologyClassBuilder.ontologyClass("NCIT:C27966", "Stage I"); + private static final OntologyClass STAGE_II = OntologyClassBuilder.ontologyClass("NCIT:C28054", "Stage II"); + private static final OntologyClass STAGE_III = OntologyClassBuilder.ontologyClass("NCIT:C27970", "Stage III"); + private static final OntologyClass STAGE_IV = OntologyClassBuilder.ontologyClass("NCIT:C27971", "Stage IV"); + private static final OntologyClass NYHA_I = OntologyClassBuilder.ontologyClass("NCIT:C66904", "New York Heart Association Class I"); + private static final OntologyClass NYHA_II = OntologyClassBuilder.ontologyClass("NCIT:C66905", "New York Heart Association Class II"); + private static final OntologyClass NYHA_III = OntologyClassBuilder.ontologyClass("NCIT:C66907", "New York Heart Association Class III"); + private static final OntologyClass NYHA_III_IV = OntologyClassBuilder.ontologyClass("NCIT:C7922", "New York Heart Association Class III/IV"); + private static final OntologyClass NYHA_IV = OntologyClassBuilder.ontologyClass("NCIT:C66908", "New York Heart Association Class IV"); + + + public static OntologyClass stage0() { return STAGE_0; } + public static OntologyClass stageI() { return STAGE_I; } + public static OntologyClass stageII() { return STAGE_II; } + public static OntologyClass stageIII() { return STAGE_III; } + public static OntologyClass stageIV() { return STAGE_IV; } + public static OntologyClass nyhaClassI() { return NYHA_I; } + public static OntologyClass nyhaClassII() { return NYHA_II; } + public static OntologyClass nyhaClassIII() { return NYHA_III; } + public static OntologyClass nyhaClassIII_or_IV() { return NYHA_III_IV; } + public static OntologyClass nyhaClassIV() { return NYHA_IV; } + +} diff --git a/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java new file mode 100644 index 00000000..657e6dc5 --- /dev/null +++ b/phenopacket-tools-builder/src/test/java/org/phenopackets/phenopackettools/builder/builders/OntologyClassBuilderTest.java @@ -0,0 +1,22 @@ +package org.phenopackets.phenopackettools.builder.builders; + +import org.junit.jupiter.api.Test; +import org.phenopackets.schema.v2.core.Disease; +import org.phenopackets.schema.v2.core.OntologyClass; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder.ontologyClass; + +public class OntologyClassBuilderTest { + + @Test + public void testBuilder() { + OntologyClass longPR1 = OntologyClass.newBuilder() + .setId("HP:0012248") + .setLabel("Prolonged PR interval") + .build(); + OntologyClass longPR2 = ontologyClass("HP:0012248", "Prolonged PR interval"); + assertThat(longPR1, equalTo(longPR2)); + } +} From 3c6fc14e6b8d09be76b3b366573470971ec084cb Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 2 Nov 2022 19:35:37 -0400 Subject: [PATCH 46/79] Adding AdministrationRoute as a Constant --- constants/AdministrationRoute.tsv | 10 ++++ constants/rtd_texts.txt | 3 +- docs/constants.rst | 52 +++++++++++++------ .../constants/AdministrationRoute.java | 29 +++++++++++ 4 files changed, 77 insertions(+), 17 deletions(-) create mode 100644 constants/AdministrationRoute.tsv create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java diff --git a/constants/AdministrationRoute.tsv b/constants/AdministrationRoute.tsv new file mode 100644 index 00000000..7d2dcf13 --- /dev/null +++ b/constants/AdministrationRoute.tsv @@ -0,0 +1,10 @@ +ontology.id ontology.label variable.name function.name +NCIT:C38276 Intravenous Route of Administration INTRAVENOUS_ROUTE intravenous +NCIT:C38222 Intraarterial Route of Administration INTRAARTERIAL_ROUTE intraarterial +NCIT:C183503 Administration via Wound Irrigation WOUND_IRRIGATION_ROUTE woundIrrigation +NCIT:C149695 Nebulizer Route of Administration NEBULIZER_ROUTE nebulizer +NCIT:C38288 Oral Route of Administration ORAL_ROUTE oral +NCIT:C38267 Intrathecal Route of Administration INTRATHECAL_ROUTE intrathecal +NCIT:C38677 Peridural Route of Administration PERIDURAL_ROUTE peridural +NCIT:C38304 Topical Route of Administration TOPICAL_ROUTE topical +NCIT:C38305 Transdermal Route of Administration TRANSDERMAL transdermal diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index a9279e26..ad36e1c0 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -18,4 +18,5 @@ BiospecimenType|Terms from the `NCI Thesaurus `_ are used to indicate if a specimen is from the primary tumor, a metastasis or a recurrence. TumorGrade|Terms from the `NCI Thesaurus `_ to describe microscopic appearance of tumor. Grade 1: Well differentiated (low grade); Grade 2: Moderately differentiated (intermediate grade); Grade 3: Poorly differentiated (high grade); Grade 4: Undifferentiated (high grade). DiseaseGrade|Terms from the `NCI Thesaurus `_ to represent the tumor grade. -MaterialSample|Terms from the `EFO `_ to specify the status of the sample. \ No newline at end of file +MaterialSample|Terms from the `EFO `_ to specify the status of the sample. +AdministrationRoute|Terms from the `NCI Thesaurus `_ to represent the way in which a medicinal product is introduced into the body. \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index 7a70c8f8..a97ac851 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -25,6 +25,26 @@ In contrast, this is the code required with phenopacket-tools (omitting import s The following tables present the available static functions with predefined concepts. +AdministrationRoute +^^^^^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ to represent the way in which a medicinal product is introduced into the body. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C38276", "Intravenous Route of Administration", "intravenous()" + "NCIT:C38222", "Intraarterial Route of Administration", "intraarterial()" + "NCIT:C183503", "Administration via Wound Irrigation", "woundIrrigation()" + "NCIT:C149695", "Nebulizer Route of Administration", "nebulizer()" + "NCIT:C38288", "Oral Route of Administration", "oral()" + "NCIT:C38267", "Intrathecal Route of Administration", "intrathecal()" + "NCIT:C38677", "Peridural Route of Administration", "peridural()" + "NCIT:C38304", "Topical Route of Administration", "topical()" + "NCIT:C38305", "Transdermal Route of Administration", "transdermal()" + + AllelicState ^^^^^^^^^^^^ @@ -88,22 +108,6 @@ Terms from the `NCI Thesaurus `_ to r "NCIT:C28082", "Grade 4", "grade4()" -Evidence -^^^^^^^^ - -Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. - -.. csv-table:: - :header: "id", "label", "function name" - :widths: 30, 200, 200 - - "ECO:0006016", "author statement from published clinical study", "authorStatementFromPublishedClinicalStudy()" - "ECO:0007539", "author statement from published clinical study used in automatic assertion", "authorStatementFromPublishedClinicalStudyAutomaticAssertion()" - "ECO:0006017", "author statement from published clinical study used in manual assertion", "authorStatementFromPublishedClinicalStudyManualAssertion()" - "ECO:0000033", "author statement supported by traceable reference", "authorStatementSupportedByTraceableReference()" - "ECO:0006154", "self-reported patient statement evidence", "selfReportedPatientStatementEvidence()" - - DiseaseStage ^^^^^^^^^^^^ @@ -125,6 +129,22 @@ These codes from `NCI Thesaurus `_ ca "NCIT:C66908", "New York Heart Association Class IV", "nyhaClassIV()" +Evidence +^^^^^^^^ + +Terms from the `Evidence and Con clusion Ontology ` are used to specify evidence categories. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "ECO:0006016", "author statement from published clinical study", "authorStatementFromPublishedClinicalStudy()" + "ECO:0007539", "author statement from published clinical study used in automatic assertion", "authorStatementFromPublishedClinicalStudyAutomaticAssertion()" + "ECO:0006017", "author statement from published clinical study used in manual assertion", "authorStatementFromPublishedClinicalStudyManualAssertion()" + "ECO:0000033", "author statement supported by traceable reference", "authorStatementSupportedByTraceableReference()" + "ECO:0006154", "self-reported patient statement evidence", "selfReportedPatientStatementEvidence()" + + Gender ^^^^^^ diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java new file mode 100644 index 00000000..366c496c --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java @@ -0,0 +1,29 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class AdministrationRoute { + + private static final OntologyClass INTRAVENOUS_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38276", "Intravenous Route of Administration"); + private static final OntologyClass INTRAARTERIAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38222", "Intraarterial Route of Administration"); + private static final OntologyClass WOUND_IRRIGATION_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C183503", "Administration via Wound Irrigation"); + private static final OntologyClass NEBULIZER_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C149695", "Nebulizer Route of Administration"); + private static final OntologyClass ORAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38288", "Oral Route of Administration"); + private static final OntologyClass INTRATHECAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38267", "Intrathecal Route of Administration"); + private static final OntologyClass PERIDURAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38677", "Peridural Route of Administration"); + private static final OntologyClass TOPICAL_ROUTE = OntologyClassBuilder.ontologyClass("NCIT:C38304", "Topical Route of Administration"); + private static final OntologyClass TRANSDERMAL = OntologyClassBuilder.ontologyClass("NCIT:C38305", "Transdermal Route of Administration"); + + + public static OntologyClass intravenous() { return INTRAVENOUS_ROUTE; } + public static OntologyClass intraarterial() { return INTRAARTERIAL_ROUTE; } + public static OntologyClass woundIrrigation() { return WOUND_IRRIGATION_ROUTE; } + public static OntologyClass nebulizer() { return NEBULIZER_ROUTE; } + public static OntologyClass oral() { return ORAL_ROUTE; } + public static OntologyClass intrathecal() { return INTRATHECAL_ROUTE; } + public static OntologyClass peridural() { return PERIDURAL_ROUTE; } + public static OntologyClass topical() { return TOPICAL_ROUTE; } + public static OntologyClass transdermal() { return TRANSDERMAL; } + +} From d35bed60f20ee8005c51ddf4c44859369a99add7 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 09:45:16 -0400 Subject: [PATCH 47/79] Create dedicated `cli` package for `cli` classes/packages. Signed-off-by: Daniel Danis --- phenopacket-tools-cli/src/main/java/module-info.java | 9 +++------ .../phenopackets/phenopackettools/{ => cli}/Main.java | 8 ++++---- .../phenopackettools/{ => cli}/command/BaseCommand.java | 4 ++-- .../{ => cli}/command/BaseIOCommand.java | 6 +++--- .../{ => cli}/command/ConvertCommand.java | 2 +- .../{ => cli}/command/ExamplesCommand.java | 4 ++-- .../{ => cli}/command/ValidateCommand.java | 8 ++++---- .../{ => cli}/command/validate/BaseValidateCommand.java | 2 +- .../command/validate/ValidateCohortCommand.java | 2 +- .../command/validate/ValidateFamilyCommand.java | 2 +- .../command/validate/ValidatePhenopacketCommand.java | 2 +- .../{ => cli}/examples/AtaxiaWithVitaminEdeficiency.java | 2 +- .../{ => cli}/examples/BethlehamMyopathy.java | 4 +--- .../phenopackettools/{ => cli}/examples/Covid.java | 2 +- .../{ => cli}/examples/DuchenneExon51Deletion.java | 2 +- .../{ => cli}/examples/FamilyWithPedigree.java | 2 +- .../{ => cli}/examples/Holoprosencephaly5.java | 2 +- .../phenopackettools/{ => cli}/examples/Marfan.java | 2 +- .../{ => cli}/examples/NemalineMyopathyPrenatal.java | 2 +- .../{ => cli}/examples/PhenopacketExample.java | 2 +- .../{ => cli}/examples/PneumothoraxSecondaryToCOVID.java | 2 +- .../{ => cli}/examples/Pseudoexfoliation.java | 2 +- .../{ => cli}/examples/Retinoblastoma.java | 2 +- .../examples/SevereStatinInducedAutoimmuneMyopathy.java | 2 +- .../{ => cli}/examples/SquamousCellCancer.java | 2 +- .../{ => cli}/examples/UrothelialCancer.java | 2 +- .../{ => cli}/examples/WarburgMicroSyndrome.java | 2 +- .../{ => cli}/writer/CSVValidationResultsWriter.java | 2 +- .../phenopackettools/{ => cli}/application.properties | 0 .../phenopackets/phenopackettools/{ => cli}/banner.txt | 0 .../command/BaseCommandTest.java} | 4 ++-- 31 files changed, 42 insertions(+), 47 deletions(-) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/Main.java (87%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/BaseCommand.java (96%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/BaseIOCommand.java (96%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/ConvertCommand.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/ExamplesCommand.java (97%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/ValidateCommand.java (96%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/validate/BaseValidateCommand.java (98%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/validate/ValidateCohortCommand.java (95%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/validate/ValidateFamilyCommand.java (95%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/command/validate/ValidatePhenopacketCommand.java (96%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/AtaxiaWithVitaminEdeficiency.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/BethlehamMyopathy.java (97%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/Covid.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/DuchenneExon51Deletion.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/FamilyWithPedigree.java (98%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/Holoprosencephaly5.java (98%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/Marfan.java (97%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/NemalineMyopathyPrenatal.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/PhenopacketExample.java (67%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/PneumothoraxSecondaryToCOVID.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/Pseudoexfoliation.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/Retinoblastoma.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/SevereStatinInducedAutoimmuneMyopathy.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/SquamousCellCancer.java (98%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/UrothelialCancer.java (99%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/examples/WarburgMicroSyndrome.java (98%) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/{ => cli}/writer/CSVValidationResultsWriter.java (98%) rename phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/{ => cli}/application.properties (100%) rename phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/{ => cli}/banner.txt (100%) rename phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/{command/BasePTCommandTest.java => cli/command/BaseCommandTest.java} (78%) diff --git a/phenopacket-tools-cli/src/main/java/module-info.java b/phenopacket-tools-cli/src/main/java/module-info.java index 9ba10f9a..77e84ce8 100644 --- a/phenopacket-tools-cli/src/main/java/module-info.java +++ b/phenopacket-tools-cli/src/main/java/module-info.java @@ -8,14 +8,11 @@ requires org.monarchinitiative.phenol.core; requires org.monarchinitiative.phenol.io; - requires com.google.protobuf.util; - requires com.fasterxml.jackson.databind; - requires com.fasterxml.jackson.dataformat.yaml; - requires commons.csv; requires info.picocli; + requires commons.csv; requires org.slf4j; requires logback.classic; - opens org.phenopackets.phenopackettools.command to info.picocli; - opens org.phenopackets.phenopackettools.command.validate to info.picocli; + opens org.phenopackets.phenopackettools.cli.command to info.picocli; + opens org.phenopackets.phenopackettools.cli.command.validate to info.picocli; } \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java similarity index 87% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java index d3ea55ab..c3a8f938 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java @@ -1,8 +1,8 @@ -package org.phenopackets.phenopackettools; +package org.phenopackets.phenopackettools.cli; -import org.phenopackets.phenopackettools.command.ValidateCommand; -import org.phenopackets.phenopackettools.command.ConvertCommand; -import org.phenopackets.phenopackettools.command.ExamplesCommand; +import org.phenopackets.phenopackettools.cli.command.ValidateCommand; +import org.phenopackets.phenopackettools.cli.command.ConvertCommand; +import org.phenopackets.phenopackettools.cli.command.ExamplesCommand; import picocli.AutoComplete; import picocli.CommandLine; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseCommand.java similarity index 96% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseCommand.java index be6bd20c..75bce2a4 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseCommand.java @@ -1,8 +1,8 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import ch.qos.logback.classic.Level; import ch.qos.logback.classic.LoggerContext; -import org.phenopackets.phenopackettools.Main; +import org.phenopackets.phenopackettools.cli.Main; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java similarity index 96% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java index a374be8b..6b5183a8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import com.google.protobuf.Message; import org.phenopackets.phenopackettools.io.PhenopacketParser; @@ -57,8 +57,8 @@ protected BaseIOCommand() { /** * Attempt to read the input in the provided {@code schemaVersion} and exit upon any failure. As a side effect, - * {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#format} - * and {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} + * {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#format} + * and {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#element} * fields are set after the function returns. *

* Note that the function does not return if reading fails. diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java index 74601ea7..2681098d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ConvertCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import com.google.protobuf.Message; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java similarity index 97% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java index ef2d31c9..2685e4d9 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ExamplesCommand.java @@ -1,11 +1,11 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import com.google.protobuf.Message; import org.phenopackets.phenopackettools.core.PhenopacketFormat; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException; -import org.phenopackets.phenopackettools.examples.*; +import org.phenopackets.phenopackettools.cli.examples.*; import org.phenopackets.phenopackettools.io.PhenopacketPrinter; import org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory; import picocli.CommandLine; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java similarity index 96% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index 4f5ad8b8..f9e13cde 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import com.google.protobuf.MessageOrBuilder; @@ -11,7 +11,7 @@ import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; import org.phenopackets.phenopackettools.validator.core.writer.ValidationResultsAndPath; import org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner; -import org.phenopackets.phenopackettools.writer.CSVValidationResultsWriter; +import org.phenopackets.phenopackettools.cli.writer.CSVValidationResultsWriter; import org.phenopackets.schema.v2.CohortOrBuilder; import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.PhenopacketOrBuilder; @@ -128,9 +128,9 @@ private List prepareCustomSchemaUrls() { * Prepare semantic validators for given {@link T}. *

* Warning - it is important to request the {@link T} that is appropriate - * for the current {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element}. + * for the current {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#element}. * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} - * while {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} + * while {@link org.phenopackets.phenopackettools.cli.command.BaseIOCommand.InputSection#element} * is {@link PhenopacketElement#FAMILY}. */ private List> configureSemanticValidators() { diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/BaseValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/BaseValidateCommand.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/BaseValidateCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/BaseValidateCommand.java index 9cbb28a1..d4ecad43 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/BaseValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/BaseValidateCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import com.google.protobuf.MessageOrBuilder; import org.monarchinitiative.phenol.io.OntologyLoader; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateCohortCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java similarity index 95% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateCohortCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java index f04288da..cd9f5ece 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateCohortCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateCohortCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateFamilyCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java similarity index 95% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateFamilyCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java index edc7cd65..ba0ce5ef 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidateFamilyCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidateFamilyCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidatePhenopacketCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java similarity index 96% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidatePhenopacketCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java index 65bce015..da2fcec8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/validate/ValidatePhenopacketCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/validate/ValidatePhenopacketCommand.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.command.validate; +package org.phenopackets.phenopackettools.cli.command.validate; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java index c7909069..9cbeb206 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/AtaxiaWithVitaminEdeficiency.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/AtaxiaWithVitaminEdeficiency.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.ga4gh.vrsatile.v1.GeneDescriptor; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java similarity index 97% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java index 587434c0..e4683464 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/BethlehamMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/BethlehamMyopathy.java @@ -1,8 +1,6 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; -import org.ga4gh.vrsatile.v1.GeneDescriptor; -import org.ga4gh.vrsatile.v1.VcfRecord; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.phenopackettools.builder.constants.Status; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Covid.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Covid.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java index 39590dac..d107a384 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Covid.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Covid.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/DuchenneExon51Deletion.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/DuchenneExon51Deletion.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java index eb98ae33..4ef12c92 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/DuchenneExon51Deletion.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/DuchenneExon51Deletion.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/FamilyWithPedigree.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/FamilyWithPedigree.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java index b00d2ca8..9c23f284 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/FamilyWithPedigree.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/FamilyWithPedigree.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.FamilyBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Holoprosencephaly5.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Holoprosencephaly5.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java index 2b41fdf9..aeb86e0f 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Holoprosencephaly5.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Holoprosencephaly5.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.ga4gh.vrsatile.v1.GeneDescriptor; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Marfan.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java similarity index 97% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Marfan.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java index 728aa8f4..e401c7ed 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Marfan.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Marfan.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/NemalineMyopathyPrenatal.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/NemalineMyopathyPrenatal.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java index 8e7471cc..eb7a00e6 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/NemalineMyopathyPrenatal.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/NemalineMyopathyPrenatal.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.ga4gh.vrs.v1.Variation; import org.ga4gh.vrsatile.v1.Expression; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PhenopacketExample.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PhenopacketExample.java similarity index 67% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PhenopacketExample.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PhenopacketExample.java index bef790d8..ac0c0934 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PhenopacketExample.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PhenopacketExample.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.schema.v2.Phenopacket; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PneumothoraxSecondaryToCOVID.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PneumothoraxSecondaryToCOVID.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PneumothoraxSecondaryToCOVID.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PneumothoraxSecondaryToCOVID.java index c063181c..d24fbeb8 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/PneumothoraxSecondaryToCOVID.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/PneumothoraxSecondaryToCOVID.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Pseudoexfoliation.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Pseudoexfoliation.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java index ed544b26..b1036f4c 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Pseudoexfoliation.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Pseudoexfoliation.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.schema.v2.Phenopacket; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Retinoblastoma.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Retinoblastoma.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java index c731cce2..69a46d8a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/Retinoblastoma.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java index e5230a27..25255a24 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SevereStatinInducedAutoimmuneMyopathy.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SevereStatinInducedAutoimmuneMyopathy.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SquamousCellCancer.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SquamousCellCancer.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SquamousCellCancer.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SquamousCellCancer.java index 1a205f55..83c94428 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/SquamousCellCancer.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/SquamousCellCancer.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/UrothelialCancer.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java similarity index 99% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/UrothelialCancer.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java index c8ed23b5..a5adac4a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/UrothelialCancer.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/UrothelialCancer.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/WarburgMicroSyndrome.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/WarburgMicroSyndrome.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/WarburgMicroSyndrome.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/WarburgMicroSyndrome.java index 43881115..5e5115f2 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples/WarburgMicroSyndrome.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/WarburgMicroSyndrome.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.examples; +package org.phenopackets.phenopackettools.cli.examples; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java similarity index 98% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java index 1040a24a..d408c35b 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java @@ -1,4 +1,4 @@ -package org.phenopackets.phenopackettools.writer; +package org.phenopackets.phenopackettools.cli.writer; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; diff --git a/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/application.properties similarity index 100% rename from phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties rename to phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/application.properties diff --git a/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/banner.txt similarity index 100% rename from phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt rename to phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/cli/banner.txt diff --git a/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java b/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/cli/command/BaseCommandTest.java similarity index 78% rename from phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java rename to phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/cli/command/BaseCommandTest.java index 9074843a..ca329296 100644 --- a/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java +++ b/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/cli/command/BaseCommandTest.java @@ -1,11 +1,11 @@ -package org.phenopackets.phenopackettools.command; +package org.phenopackets.phenopackettools.cli.command; import org.junit.jupiter.api.Test; import static org.hamcrest.MatcherAssert.*; import static org.hamcrest.Matchers.*; -public class BasePTCommandTest { +public class BaseCommandTest { @Test public void markIsSupportedForStdin() { From cb6bc25332dedf9b462471c331826b44f9aa6a8d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 09:54:34 -0400 Subject: [PATCH 48/79] Do not warn about unimplemented sniffing feature. Signed-off-by: Daniel Danis --- .../phenopackettools/cli/command/BaseIOCommand.java | 10 ++++++---- .../phenopackettools/util/format/ElementSniffer.java | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java index 6b5183a8..49b2c4eb 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/BaseIOCommand.java @@ -131,11 +131,13 @@ private void setFormatAndElement(InputStream is, PhenopacketSchemaVersion schema LOGGER.info("Input element type (-e | --element) was not provided, making an educated guess.."); LOGGER.info("The input looks like a {} ", element); inputSection.element = element; - } else { - if (!inputSection.element.equals(element)) - // Let's go an extra mile and check for the user. - LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element); } +// else { + // TODO - enable once element sniffing is implemented +// if (!inputSection.element.equals(element)) +// Let's go an extra mile and check for the user. +// LOGGER.warn("Input element is set to {} but the current input looks like a {}", inputSection.element, element); +// } } protected record MessageAndPath(Message message, Path path) {} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java index acad085e..84c33dcb 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/ElementSniffer.java @@ -65,7 +65,7 @@ public static PhenopacketElement sniff(byte[] payload, private static PhenopacketElement sniffProtobuf(byte[] payload, PhenopacketSchemaVersion schemaVersion) { // TODO - implement - LOGGER.warn("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + LOGGER.debug("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); return PhenopacketElement.PHENOPACKET; } @@ -74,13 +74,13 @@ private static PhenopacketElement sniffJson(byte[] payload, PhenopacketSchemaVer // TODO - reconsider the sniffing workflow. In case of loosely defined formats like JSON and YAML, // the fields can be in any order and we may not get enough information. // Is it OK to throw upon sniffing failure or an Optional is enough? - LOGGER.warn("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + LOGGER.debug("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); return PhenopacketElement.PHENOPACKET; } private static PhenopacketElement sniffYaml(byte[] payload, PhenopacketSchemaVersion schemaVersion) { // TODO - implement - LOGGER.warn("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); + LOGGER.debug("Sniffing is not yet implemented, assuming {}", PhenopacketElement.PHENOPACKET); return PhenopacketElement.PHENOPACKET; } } From 3976ac2037812e8440177d4eb76d465292a2b0be Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 11:03:24 -0400 Subject: [PATCH 49/79] Add comment to the generated constant classes to prevent manual edit. Signed-off-by: Daniel Danis --- constants/create_classes.py | 41 ++++++++----------- .../builder/constants/AllelicState.java | 1 + .../builder/constants/Assays.java | 1 + .../builder/constants/Gender.java | 1 + .../builder/constants/Laterality.java | 1 + .../builder/constants/MedicalActions.java | 1 + .../builder/constants/Onset.java | 1 + .../builder/constants/Organ.java | 1 + .../builder/constants/PathologicalTnm.java | 1 + .../builder/constants/Response.java | 1 + .../builder/constants/SpatialPattern.java | 1 + .../builder/constants/Unit.java | 1 + 12 files changed, 27 insertions(+), 25 deletions(-) diff --git a/constants/create_classes.py b/constants/create_classes.py index 52e19f8a..49e79251 100644 --- a/constants/create_classes.py +++ b/constants/create_classes.py @@ -75,31 +75,22 @@ def parse_csv(fname): def create_java_class(entry): java_file_name = entry.name + ".java" # LATER adjust path java_file_path = join(JAVA_DIR_PATH, java_file_name) - fh = open(java_file_path, 'wt') - fh.write("package org.phenopackets.phenopackettools.builder.constants;\n\n") - fh.write("import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder;\n") - fh.write("import org.phenopackets.schema.v2.core.OntologyClass;\n\n") - fh.write(f"public class {entry.name} {{\n\n") - items = entry.items - for item in items: - # e.g., private static final OntologyClass HETEROZYGOUS = OntologyClassBuilder.ontologyClass("GENO:0000135", "heterozygous"); - fh.write(f" private static final OntologyClass {item.variable_name} = OntologyClassBuilder.ontologyClass(") - fh.write(f"\"{item.ontology_id}\", \"{item.ontology_label}\");\n") - fh.write("\n\n") - for item in items: - # e.g., public static OntologyClass heterozygous() {return HETEROZYGOUS; } - fh.write(f" public static OntologyClass {item.function_name}() {{ return {item.variable_name}; }}\n") - fh.write("\n}\n") - fh.close() - - - - - - - - - + with open(java_file_path, 'wt') as fh: + fh.write("// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT!\n") + fh.write("package org.phenopackets.phenopackettools.builder.constants;\n\n") + fh.write("import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder;\n") + fh.write("import org.phenopackets.schema.v2.core.OntologyClass;\n\n") + fh.write(f"public class {entry.name} {{\n\n") + items = entry.items + for item in items: + # e.g., private static final OntologyClass HETEROZYGOUS = OntologyClassBuilder.ontologyClass("GENO:0000135", "heterozygous"); + fh.write(f" private static final OntologyClass {item.variable_name} = OntologyClassBuilder.ontologyClass(") + fh.write(f"\"{item.ontology_id}\", \"{item.ontology_label}\");\n") + fh.write("\n\n") + for item in items: + # e.g., public static OntologyClass heterozygous() {return HETEROZYGOUS; } + fh.write(f" public static OntologyClass {item.function_name}() {{ return {item.variable_name}; }}\n") + fh.write("\n}\n") entries = [] diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java index 1f5882bf..4258c054 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AllelicState.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java index da0f8d66..66f73070 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Assays.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java index d3174f70..1def073d 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java index 59c993e1..447637ef 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Laterality.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java index 6e9da378..0324e45a 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MedicalActions.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java index e15d4ed8..5dcbfd7e 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Onset.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java index e888c262..a2ecc900 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Organ.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java index fa9b9d37..d6954e45 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/PathologicalTnm.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java index 71e7a47e..7f4cd19b 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Response.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java index 8be52728..b83f9f82 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/SpatialPattern.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java index 090c5ae1..45515c9c 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Unit.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; From 79ac3363811076962a33858346b3908707925eef Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 3 Nov 2022 14:23:51 -0400 Subject: [PATCH 50/79] adding organ system constants --- .../core/phenotype/HpoOrganSystems.java | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java new file mode 100644 index 00000000..5ee91527 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java @@ -0,0 +1,48 @@ +package org.phenopackets.phenopackettools.validator.core.phenotype; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +/** + * This class contains constants that correspond to the upper-level HPO organ-system phenotypic abnormalities. + * They can be used together with the {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.OrganSystem} + * validators, which enforce that a phenopacket contains at least one term from a set of organ systems (observed or excluded). + * Note that users can also use any HPO term in this way -- the validator will enforce that the phenopacket has an HPO term that descends from it, + * but the most common use cases are these organ-level terms + *

{@code
+ * Ontology hpo = ...;
+ * var requiredOrganSystems = Set.of(BLOOD, CARDIOVASCULAR, SKELETAL);
+ * var validator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, requiredOrganSystems);
+ * }
+ * 
+ */ +public class HpoOrganSystems { + + public static final TermId ABNORMAL_CELLULAR = TermId.of("HP:0025354"); + public static final TermId BLOOD = TermId.of("HP:0001871"); + public static final TermId CONNECTIVE_TISSUE = TermId.of("HP:0003549"); + public static final TermId HEAD_AND_NECK = TermId.of("HP:0000152"); + public static final TermId LIMBS = TermId.of("HP:0040064"); + public static final TermId METABOLISM = TermId.of("HP:0001939"); + public static final TermId PRENATAL = TermId.of("HP:0001197"); + public static final TermId BREAST = TermId.of("HP:0000769"); + public static final TermId CARDIOVASCULAR = TermId.of("HP:0001626"); + public static final TermId DIGESTIVE = TermId.of("HP:0025031"); + public static final TermId EAR = TermId.of("HP:0000598"); + public static final TermId ENDOCRINE = TermId.of("HP:0000818"); + public static final TermId EYE = TermId.of("HP:0000478"); + public static final TermId GENITOURINARY = TermId.of("HP:0000119"); + public static final TermId IMMUNOLOGY = TermId.of("HP:0002715"); + public static final TermId INTEGUMENT = TermId.of("HP:0001574"); + public static final TermId MUSCLE = TermId.of("HP:0003011"); + public static final TermId NERVOUS_SYSTEM = TermId.of("HP:0000707"); + public static final TermId RESPIRATORY = TermId.of("HP:0002086"); + public static final TermId SKELETAL = TermId.of("HP:0000924"); + public static final TermId THORACIC_CAVITY = TermId.of("HP:0045027"); + public static final TermId VOICE = TermId.of("HP:0001608"); + public static final TermId CONSTITUTIONAL = TermId.of("HP:0025142"); + public static final TermId GROWTH = TermId.of("HP:0001507"); + public static final TermId NEOPLASM = TermId.of("HP:0002664"); + + private HpoOrganSystems() { + } +} From f1ec516bfb658b9110d4489fce0dedaee3c6eb8e Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 15:02:48 -0400 Subject: [PATCH 51/79] Setup distribution process. Signed-off-by: Daniel Danis --- CHANGELOG.rst | 21 +++++++++++ README.md | 3 +- phenopacket-tools-cli/pom.xml | 18 +++++++++ .../src/assemble/distribution.xml | 37 +++++++++++++++++++ .../src/examples/semantic.json | 3 ++ .../src/examples/syntax.json | 3 ++ 6 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.rst create mode 100644 phenopacket-tools-cli/src/assemble/distribution.xml create mode 100644 phenopacket-tools-cli/src/examples/semantic.json create mode 100644 phenopacket-tools-cli/src/examples/syntax.json diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 00000000..ddee17d4 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,21 @@ +========= +Changelog +========= + +latest +------ + +* Add I/O module, implement YAML parser and printer +* Add organ system validator +* Update generated code for the constants +* Let the user choose the CLI verbosity + +v0.4.6 +------ + +* Validate metadata +* Convert v1 variants +* Extend phenopacket examples +* Update documentation +* Improve CLI and extend CLI documentation + diff --git a/README.md b/README.md index d76c0f43..7dabba6f 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,7 @@ The cli application works in a standard UNIX-like manner. ```shell cd phenopacket-tools ./mvnw package -PXF_VERSION="0.4.7-SNAPSHOT" -alias pfx-tools="java -jar $(pwd)/phenopacket-tools-cli/target/phenopacket-tools-cli-${PXF_VERSION}.jar" +alias pfx-tools="java -jar $(pwd)/phenopacket-tools-cli/target/phenopacket-tools-cli-@project.version@.jar" pfx-tools --help ``` diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 8c23d0ce..fc2f04d6 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -75,6 +75,24 @@ org.springframework.boot spring-boot-maven-plugin + + org.apache.maven.plugins + maven-assembly-plugin + + + src/assemble/distribution.xml + + + + + make-assembly + package + + single + + + + diff --git a/phenopacket-tools-cli/src/assemble/distribution.xml b/phenopacket-tools-cli/src/assemble/distribution.xml new file mode 100644 index 00000000..3d14eee3 --- /dev/null +++ b/phenopacket-tools-cli/src/assemble/distribution.xml @@ -0,0 +1,37 @@ + + distribution + + zip + + + + + ${project.parent.basedir} + ./ + true + + README.md + LICENSE + CHANGELOG.rst + + + + + ${project.build.directory} + ./ + + phenopacket-tools-cli-${version}.jar + + + + + ${project.basedir}/src/examples + ./examples + + *.json + + + + \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/semantic.json b/phenopacket-tools-cli/src/examples/semantic.json new file mode 100644 index 00000000..0e0dcd23 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/semantic.json @@ -0,0 +1,3 @@ +{ + +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/syntax.json b/phenopacket-tools-cli/src/examples/syntax.json new file mode 100644 index 00000000..0e0dcd23 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/syntax.json @@ -0,0 +1,3 @@ +{ + +} \ No newline at end of file From ef7ec368a8da4324517f54a7188073b32ad3bf38 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 15:07:54 -0400 Subject: [PATCH 52/79] Update the documentation. Signed-off-by: Daniel Danis --- docs/cli.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index f60a64b2..898cb137 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -17,15 +17,18 @@ no special installation procedure if Java 17 or better is available in your envi Setup ~~~~~ -Most users should *download* the precompiled JAR file from *phenopacket-tools* release page. +Most users should *download* the distribution ZIP file with precompiled JAR file from *phenopacket-tools* release page. However, it is also possible to *build* the JAR from sources. Download ^^^^^^^^ -*phenopacket-tools* JAR is provided as part of *phenopacket-tools*' release schedule +*phenopacket-tools* JAR is provided in the distribution ZIP file as part of *phenopacket-tools*' release schedule from `Releases `_. +The ZIP archive contains the executable JAR file along with README and example phenopackets required to run the setup +and the tutorial. + Build from source code ^^^^^^^^^^^^^^^^^^^^^^ @@ -44,9 +47,9 @@ Run the following commands to check out the stable source code and to build the $ cd phenopacket-tools $ ./mvnw -Prelease package -After a successful build, a file ``phenopacket-tools-cli-${project.version}.jar`` will be created in -the ``phenopacket-tools-cli/target`` directory. Use the JAR file in the same way as the JAR downloaded -from *phenopacket-tools* releases. +After a successful build, a distribution ZIP file ``phenopacket-tools-cli-${project.version}-distribution.zip`` +will be created in the ``phenopacket-tools-cli/target`` directory. Use the ZIP archive in the same way as the archive +downloaded from *phenopacket-tools* releases. .. note:: Replace ``${project.version}`` with a given version (e.g. ``0.4.6``). From 58576fa04c7c1e40770309c3765eef17c3796f58 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 15:10:13 -0400 Subject: [PATCH 53/79] Add example JSON schema for HPO rare disease validation. Signed-off-by: Daniel Danis --- .../src/assemble/distribution.xml | 5 ++- .../examples/{ => phenopackets}/semantic.json | 0 .../examples/{ => phenopackets}/syntax.json | 0 .../schemas/hpo-rare-disease-schema.json | 41 +++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) rename phenopacket-tools-cli/src/examples/{ => phenopackets}/semantic.json (100%) rename phenopacket-tools-cli/src/examples/{ => phenopackets}/syntax.json (100%) create mode 100644 phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json diff --git a/phenopacket-tools-cli/src/assemble/distribution.xml b/phenopacket-tools-cli/src/assemble/distribution.xml index 3d14eee3..47440080 100644 --- a/phenopacket-tools-cli/src/assemble/distribution.xml +++ b/phenopacket-tools-cli/src/assemble/distribution.xml @@ -25,12 +25,13 @@ phenopacket-tools-cli-${version}.jar - + ${project.basedir}/src/examples ./examples - *.json + phenopackets/* + schemas/* diff --git a/phenopacket-tools-cli/src/examples/semantic.json b/phenopacket-tools-cli/src/examples/phenopackets/semantic.json similarity index 100% rename from phenopacket-tools-cli/src/examples/semantic.json rename to phenopacket-tools-cli/src/examples/phenopackets/semantic.json diff --git a/phenopacket-tools-cli/src/examples/syntax.json b/phenopacket-tools-cli/src/examples/phenopackets/syntax.json similarity index 100% rename from phenopacket-tools-cli/src/examples/syntax.json rename to phenopacket-tools-cli/src/examples/phenopackets/syntax.json diff --git a/phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json b/phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json new file mode 100644 index 00000000..d56a28fd --- /dev/null +++ b/phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema#", + "$id": "example.hpo.jsonschema.validator", + "title": "HPO Rare Disease Phenopacket Schema", + "description": "HPO Rare Disease Schema for GA4GH Phenopacket", + "_comment": "Here we require the phenopacket to have the following elements that are not required by the default schema 1. subject (proband being investigated) 2. at least one phenotypicFeature element 3. time_at_last encounter (subelement of subject), representing the age of the proband. In addition, we require that Human Phenotype Ontology (HPO) terms are used to represent phenotypicFeature", + "type": "object", + "properties": { + "subject": { + "type": "object", + "description": "The subject element is required for a rare-disease Phenopacket", + "properties": { + "timeAtLastEncounter": { + "type": "object", + "description": "The time at last encounter is required for a rare-disease phenopacket" + } + }, + "required": [ + "timeAtLastEncounter" + ] + }, + "phenotypicFeatures": { + "type": "array", + "items": [ + { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^HP:\\([0-9]{7}$" + } + } + } + ] + } + }, + "required": [ + "subject", + "phenotypicFeatures" + ] +} \ No newline at end of file From eaebfad97b7e248da9e656c452126cf5a57c49d3 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 15:12:08 -0400 Subject: [PATCH 54/79] Add README into `supplementary` folder. Signed-off-by: Daniel Danis --- supplementary/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 supplementary/README.md diff --git a/supplementary/README.md b/supplementary/README.md new file mode 100644 index 00000000..afe140ed --- /dev/null +++ b/supplementary/README.md @@ -0,0 +1,4 @@ +# README + +Consider deleting the `supplementary` folder since another copy of the HPO rare disease schema is bundled +into distribution ZIP. From 4ff0c82f0bdb61818de9be7f780211c79f65baeb Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 3 Nov 2022 15:53:54 -0400 Subject: [PATCH 55/79] adding TreatmentTermination --- constants/Assays.tsv | 2 -- constants/TreatmentTermination.tsv | 5 ++++ constants/rtd_texts.txt | 3 ++- docs/constants.rst | 27 ++++++++++--------- .../constants/AdministrationRoute.java | 1 + .../builder/constants/BiospecimenType.java | 1 + .../builder/constants/DiseaseGrade.java | 1 + .../builder/constants/DiseaseStage.java | 1 + .../builder/constants/Evidence.java | 1 + .../builder/constants/MaterialSample.java | 1 + .../builder/constants/Severity.java | 1 + .../constants/TreatmentTermination.java | 20 ++++++++++++++ .../builder/constants/TumorProgression.java | 1 + 13 files changed, 50 insertions(+), 15 deletions(-) delete mode 100644 constants/Assays.tsv create mode 100644 constants/TreatmentTermination.tsv create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java diff --git a/constants/Assays.tsv b/constants/Assays.tsv deleted file mode 100644 index c0bc6d28..00000000 --- a/constants/Assays.tsv +++ /dev/null @@ -1,2 +0,0 @@ -ontology.id ontology.label variable.name function.name -LOINC:2157-6 Creatine kinase [Enzymatic activity/volume] in Serum or Plasma CREATINE_KINASE creatineKinaseActivity diff --git a/constants/TreatmentTermination.tsv b/constants/TreatmentTermination.tsv new file mode 100644 index 00000000..d64c1a6e --- /dev/null +++ b/constants/TreatmentTermination.tsv @@ -0,0 +1,5 @@ +ontology.id ontology.label variable.name function.name +NCIT:C105740 Treatment Completed as Prescribed TREATMENT_COMPLETED_AS_PRESCRIBED treatmentCompletedAsPrescribed +NCIT:C105741 Treatment Terminated Due to Toxicity TREATMENT_TERMINATED_TOXICITY treatmentTerminatedDueToToxicity +NCIT:C106470 Treatment on Hold TREATMENT_ON_HOLD treatmentOnHold +NCIT:C41331 Adverse Event ADVERSE_EVENT adverseEvent diff --git a/constants/rtd_texts.txt b/constants/rtd_texts.txt index ad36e1c0..9546fb50 100644 --- a/constants/rtd_texts.txt +++ b/constants/rtd_texts.txt @@ -19,4 +19,5 @@ TumorProgression|Terms from the `NCI Thesaurus `_ to describe microscopic appearance of tumor. Grade 1: Well differentiated (low grade); Grade 2: Moderately differentiated (intermediate grade); Grade 3: Poorly differentiated (high grade); Grade 4: Undifferentiated (high grade). DiseaseGrade|Terms from the `NCI Thesaurus `_ to represent the tumor grade. MaterialSample|Terms from the `EFO `_ to specify the status of the sample. -AdministrationRoute|Terms from the `NCI Thesaurus `_ to represent the way in which a medicinal product is introduced into the body. \ No newline at end of file +AdministrationRoute|Terms from the `NCI Thesaurus `_ to represent the way in which a medicinal product is introduced into the body. +TreatmentTermination|Terms from the `NCI Thesaurus `_ to represent the reason that the treatment was completed or stopped early. \ No newline at end of file diff --git a/docs/constants.rst b/docs/constants.rst index a97ac851..f2b11cce 100644 --- a/docs/constants.rst +++ b/docs/constants.rst @@ -59,18 +59,6 @@ Terms from the `GENE ontology `_ are "GENO:0000134", "hemizygous", "hemizygous()" -Assays -^^^^^^ - -If possible, `LOINC `_ codes should be used to specify laboratory test assays. - -.. csv-table:: - :header: "id", "label", "function name" - :widths: 30, 200, 200 - - "LOINC:2157-6", "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma", "creatineKinaseActivity()" - - BiospecimenType ^^^^^^^^^^^^^^^ @@ -397,6 +385,21 @@ Modifier terms from the `HPO `_ are used to describe s "HP:0032540", "Joint flexor surface localization", "jointFlexorSurfaceLocalization()" +TreatmentTermination +^^^^^^^^^^^^^^^^^^^^ + +Terms from the `NCI Thesaurus `_ to represent the reason that the treatment was completed or stopped early. + +.. csv-table:: + :header: "id", "label", "function name" + :widths: 30, 200, 200 + + "NCIT:C105740", "Treatment Completed as Prescribed", "treatmentCompletedAsPrescribed()" + "NCIT:C105741", "Treatment Terminated Due to Toxicity", "treatmentTerminatedDueToToxicity()" + "NCIT:C106470", "Treatment on Hold", "treatmentOnHold()" + "NCIT:C41331", "Adverse Event", "adverseEvent()" + + TumorProgression ^^^^^^^^^^^^^^^^ diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java index 366c496c..bf0849fb 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/AdministrationRoute.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java index dfcdf261..75bd27b5 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/BiospecimenType.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java index d5baa9d5..aa15befb 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseGrade.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java index 36a6abb2..15c1551f 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/DiseaseStage.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java index 172e2420..06f5cf6b 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Evidence.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java index cf5683d4..024190e0 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/MaterialSample.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java index 276120a5..50566eed 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Severity.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java new file mode 100644 index 00000000..ff792267 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TreatmentTermination.java @@ -0,0 +1,20 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class TreatmentTermination { + + private static final OntologyClass TREATMENT_COMPLETED_AS_PRESCRIBED = OntologyClassBuilder.ontologyClass("NCIT:C105740", "Treatment Completed as Prescribed"); + private static final OntologyClass TREATMENT_TERMINATED_TOXICITY = OntologyClassBuilder.ontologyClass("NCIT:C105741", "Treatment Terminated Due to Toxicity"); + private static final OntologyClass TREATMENT_ON_HOLD = OntologyClassBuilder.ontologyClass("NCIT:C106470", "Treatment on Hold"); + private static final OntologyClass ADVERSE_EVENT = OntologyClassBuilder.ontologyClass("NCIT:C41331", "Adverse Event"); + + + public static OntologyClass treatmentCompletedAsPrescribed() { return TREATMENT_COMPLETED_AS_PRESCRIBED; } + public static OntologyClass treatmentTerminatedDueToToxicity() { return TREATMENT_TERMINATED_TOXICITY; } + public static OntologyClass treatmentOnHold() { return TREATMENT_ON_HOLD; } + public static OntologyClass adverseEvent() { return ADVERSE_EVENT; } + +} diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java index 373d37d3..4acdbbcf 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/TumorProgression.java @@ -1,3 +1,4 @@ +// Generated by phenopacket-tools/constants/create_classes.py. DO NOT EDIT! package org.phenopackets.phenopackettools.builder.constants; import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; From 8acf56b2d4ea72716aafcc99fa0964df7a1a999f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 18:27:05 -0400 Subject: [PATCH 56/79] Add UCUM, LOINC and DrugCentral builders into `Resources`. Signed-off-by: Daniel Danis --- .../builder/builders/Resources.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index 2747981d..ff5c3d0e 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -91,6 +91,27 @@ private Resources() { .setUrl("https://www.genenames.org") .setIriPrefix("https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/"); + private static final Resource.Builder UCUM_BUILDER = Resource.newBuilder() + .setId("ucum") + .setName("Unified Code for Units of Measure") + .setNamespacePrefix("UCUM") + .setUrl("https://ucum.org") + .setIriPrefix("https://ucum.org/"); // TODO - we need to get a real prefix + + private static final Resource.Builder LOINC_BUILDER = Resource.newBuilder() + .setId("loinc") + .setName("Logical Observation Identifiers Names and Codes") + .setNamespacePrefix("LOINC") + .setUrl("https://loinc.org") + .setIriPrefix("https://loinc.org/"); + + private static final Resource.Builder DRUG_CENTRAL_BUILDER = Resource.newBuilder() + .setId("drugcentral") + .setName("Drug Central") + .setNamespacePrefix("DrugCentral") + .setUrl("https://drugcentral.org/") + .setIriPrefix("https://drugcentral.org/drugcard/"); + public static Resource hgncVersion(String version) { return HGNC_BUILDER.setVersion(version).build(); } public static Resource hpoVersion(String version) { @@ -136,4 +157,16 @@ public static Resource soVersion(String version) { public static Resource uoVersion(String version) { return UO_BUILDER.setVersion(version).build(); } + + public static Resource ucumVersion(String version) { + return UCUM_BUILDER.setVersion(version).build(); + } + + public static Resource loincVersion(String version) { + return LOINC_BUILDER.setVersion(version).build(); + } + + public static Resource drugCentralVersion(String version) { + return DRUG_CENTRAL_BUILDER.setVersion(version).build(); + } } From fc985ddee863b0ea8d268642d4be69d76674536c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 18:27:56 -0400 Subject: [PATCH 57/79] Finalize names and descriptions of the base validators. Signed-off-by: Daniel Danis --- .../validator/core/ValidatorInfo.java | 14 ++------------ .../validator/core/ValidatorInfoDefault.java | 6 ++---- .../core/metadata/BaseMetaDataValidator.java | 4 ++-- .../v2/JsonSchemaValidatorConfigurer.java | 6 +++--- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java index f0d199d1..99fd4a8b 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java @@ -5,18 +5,8 @@ */ public interface ValidatorInfo { - static ValidatorInfo genericJsonSchema() { - return ValidatorInfoDefault.GENERIC; - } - - /** - * This class implements additional validation of a phenopacket that is intended to be used - * for HPO rare disease phenotyping. By assumption, the phenopacket will have been first - * checked against the {@link ValidatorInfo#genericJsonSchema()} specification. This class performs validation with the - * file {@code hpo-rare-disease-schema.json}. - */ - static ValidatorInfo rareDiseaseValidation() { - return ValidatorInfoDefault.RARE_DISEASE_VALIDATOR; + static ValidatorInfo baseSyntaxValidation() { + return ValidatorInfoDefault.BASE; } /** diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java index 5ec46e0c..053c5a61 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java @@ -3,9 +3,7 @@ record ValidatorInfoDefault(String validatorId, String validatorName, String description) implements ValidatorInfo { - // TODO - add descriptions - static final ValidatorInfoDefault GENERIC = new ValidatorInfoDefault("GENERIC", "Validation of a generic Phenopacket", ""); - static final ValidatorInfoDefault RARE_DISEASE_VALIDATOR = new ValidatorInfoDefault("RARE_DISEASE_VALIDATOR", "Validation of rare disease Phenopacket constraints", ""); - static final ValidatorInfoDefault INPUT_VALIDATOR = new ValidatorInfoDefault("Input", "Input of phenopacket data", "Validation of data format"); + static final ValidatorInfoDefault BASE = new ValidatorInfoDefault("Base", "Base syntax validator", "The base syntax validation of a phenopacket, family, or cohort"); + static final ValidatorInfoDefault INPUT_VALIDATOR = new ValidatorInfoDefault("Input", "Data format validator", "The validator for checking data format issues (e.g. presence of a required field in JSON document)"); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java index 4aee2f6d..8a053cec 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/metadata/BaseMetaDataValidator.java @@ -18,8 +18,8 @@ abstract class BaseMetaDataValidator implements Phen private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "MetaDataValidator", - "MetaDataValidator for Phenopacket, Family, and Cohort", - "Validate that the MetaData section includes information about all ontologies used"); + "MetaData validator", + "Validate that the MetaData section describes all used ontologies"); @Override public ValidatorInfo validatorInfo() { diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java index e75f7f1e..a1f866d6 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java @@ -113,7 +113,7 @@ private static JsonNode readJsonSchemaNode(InputStream is) throws IOException { private static JsonSchemaNodeAndInfo phenopacketJsonSchemaAndInfo() { try (InputStream is = JsonSchemaValidatorConfigurer.class.getResourceAsStream(PHENOPACKET_SCHEMA_PATH)) { JsonNode schemaNode = readJsonSchemaNode(is); - return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.genericJsonSchema()); + return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.baseSyntaxValidation()); } catch (IOException e) { throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); } @@ -125,7 +125,7 @@ private static JsonSchemaNodeAndInfo phenopacketJsonSchemaAndInfo() { private static JsonSchemaNodeAndInfo familyJsonSchemaAndInfo() { try (InputStream is = JsonSchemaValidatorConfigurer.class.getResourceAsStream(FAMILY_SCHEMA_PATH)) { JsonNode schemaNode = readJsonSchemaNode(is); - return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.genericJsonSchema()); + return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.baseSyntaxValidation()); } catch (IOException e) { throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); } @@ -137,7 +137,7 @@ private static JsonSchemaNodeAndInfo familyJsonSchemaAndInfo() { private static JsonSchemaNodeAndInfo cohortJsonSchemaAndInfo() { try (InputStream is = JsonSchemaValidatorConfigurer.class.getResourceAsStream(COHORT_SCHEMA_PATH)) { JsonNode schemaNode = readJsonSchemaNode(is); - return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.genericJsonSchema()); + return new JsonSchemaNodeAndInfo(schemaNode, ValidatorInfo.baseSyntaxValidation()); } catch (IOException e) { throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); } From ee365d33820902d4dd7b6dac35830bd0651bdd6b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 3 Nov 2022 18:29:29 -0400 Subject: [PATCH 58/79] Prettify base validator messages. Signed-off-by: Daniel Danis --- .../jsonschema/impl/JsonSchemaValidator.java | 28 +++- ...sonSchemaValidationWorkflowRunnerTest.java | 134 +++++++++--------- .../impl/JsonSchemaDiseaseValidatorTest.java | 2 +- .../impl/JsonSchemaValidatorTest.java | 10 +- 4 files changed, 98 insertions(+), 76 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java index b883b534..9811498c 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidator.java @@ -2,12 +2,18 @@ import com.fasterxml.jackson.databind.JsonNode; import com.networknt.schema.JsonSchema; +import com.networknt.schema.ValidationMessage; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; import java.util.Objects; -import java.util.stream.Collectors; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; /** * {@link JsonSchemaValidator} applies a single {@link JsonSchema} @@ -16,6 +22,9 @@ */ public class JsonSchemaValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class); + private static final Pattern VALIDATION_MSG_PT = Pattern.compile("^\\$\\.(?[\\w\\[\\].]+):(?.*)$"); + private final JsonSchema jsonSchema; private final ValidatorInfo validatorInfo; @@ -30,8 +39,21 @@ public ValidatorInfo validatorInfo() { public List validate(JsonNode node) { return jsonSchema.validate(node).stream() - .map(res -> ValidationResult.error(validatorInfo, res.getType(), res.getMessage())) - .collect(Collectors.toList()); + .flatMap(validationMessageIntoValidationResult()) + .toList(); + } + + private Function> validationMessageIntoValidationResult() { + return res -> { + Matcher matcher = VALIDATION_MSG_PT.matcher(res.getMessage()); + if (matcher.matches()) { + String msg = "'%s'%s".formatted(matcher.group("location"), matcher.group("msg")); + return Stream.of(ValidationResult.error(validatorInfo, res.getType(), msg)); + } else { + LOGGER.warn("Non-matching validation message: {}", res.getMessage()); + return Stream.empty(); + } + }; } } diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index cf885756..9a11f53b 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -53,8 +53,8 @@ public void setUp() { */ @ParameterizedTest @CsvSource({ - "/id, DELETE, '$.id: is missing but it is required'", - "/metaData, DELETE, '$.metaData: is missing but it is required'", + "/id, DELETE, 'id' is missing but it is required", + "/metaData, DELETE, 'metaData' is missing but it is required", }) public void checkTopLevelPhenopacketConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -65,7 +65,7 @@ public void checkTopLevelPhenopacketConstraints(String path, String action, Stri */ @ParameterizedTest @CsvSource({ - "/subject/id, DELETE, '$.subject.id: is missing but it is required'" + "/subject/id, DELETE, 'subject.id' is missing but it is required" }) public void checkSubjectConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -77,7 +77,7 @@ public void checkSubjectConstraints(String path, String action, String expected) */ @ParameterizedTest @CsvSource({ - "/subject/vitalStatus/status, DELETE, '$.subject.vitalStatus.status: is missing but it is required'" + "/subject/vitalStatus/status, DELETE, 'subject.vitalStatus.status' is missing but it is required" }) public void checkVitalStatusConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -88,8 +88,8 @@ public void checkVitalStatusConstraints(String path, String action, String expec */ @ParameterizedTest @CsvSource({ - "/phenotypicFeatures[0]/type, DELETE, '$.phenotypicFeatures[0].type: is missing but it is required'", - "/phenotypicFeatures[1]/type, DELETE, '$.phenotypicFeatures[1].type: is missing but it is required'" + "/phenotypicFeatures[0]/type, DELETE, 'phenotypicFeatures[0].type' is missing but it is required", + "/phenotypicFeatures[1]/type, DELETE, 'phenotypicFeatures[1].type' is missing but it is required" }) public void checkPhenotypicFeatureConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -103,17 +103,17 @@ public void checkPhenotypicFeatureConstraints(String path, String action, String @CsvSource({ // TODO - this returns an error for each oneOf field // "/phenotypicFeatures[0]/onset/gestationalAge, DELETE, '$.phenotypicFeatures[0].onset.gestationalAge.weeks: is missing but it is required'", - "/phenotypicFeatures[0]/onset/gestationalAge/weeks, DELETE, '$.phenotypicFeatures[0].onset.gestationalAge.weeks: is missing but it is required'", - "/phenotypicFeatures[0]/onset/gestationalAge/weeks, SET[-1], '$.phenotypicFeatures[0].onset.gestationalAge.weeks: must have a minimum value of 0'", - "/phenotypicFeatures[0]/onset/gestationalAge/days, SET[-1], '$.phenotypicFeatures[0].onset.gestationalAge.days: must have a minimum value of 0'", - "/phenotypicFeatures[1]/onset/age/iso8601duration, DELETE, '$.phenotypicFeatures[1].onset.age.iso8601duration: is missing but it is required'", + "/phenotypicFeatures[0]/onset/gestationalAge/weeks, DELETE, 'phenotypicFeatures[0].onset.gestationalAge.weeks' is missing but it is required", + "/phenotypicFeatures[0]/onset/gestationalAge/weeks, SET[-1], 'phenotypicFeatures[0].onset.gestationalAge.weeks' must have a minimum value of 0", + "/phenotypicFeatures[0]/onset/gestationalAge/days, SET[-1], 'phenotypicFeatures[0].onset.gestationalAge.days' must have a minimum value of 0", + "/phenotypicFeatures[1]/onset/age/iso8601duration, DELETE, 'phenotypicFeatures[1].onset.age.iso8601duration' is missing but it is required", // TODO - add test for ensuring that the duration is in an ISO8601 pattern - "/phenotypicFeatures[2]/onset/ageRange/start, DELETE, '$.phenotypicFeatures[2].onset.ageRange.start: is missing but it is required'", - "/phenotypicFeatures[2]/onset/ageRange/end, DELETE, '$.phenotypicFeatures[2].onset.ageRange.end: is missing but it is required'", + "/phenotypicFeatures[2]/onset/ageRange/start, DELETE, 'phenotypicFeatures[2].onset.ageRange.start' is missing but it is required", + "/phenotypicFeatures[2]/onset/ageRange/end, DELETE, 'phenotypicFeatures[2].onset.ageRange.end' is missing but it is required", // TODO - require end being at or after start // We do not tamper with the ontology class and timestamp as we test their validity elsewhere. - "/phenotypicFeatures[5]/onset/interval/start, DELETE, '$.phenotypicFeatures[5].onset.interval.start: is missing but it is required'", - "/phenotypicFeatures[5]/onset/interval/end, DELETE, '$.phenotypicFeatures[5].onset.interval.end: is missing but it is required'", + "/phenotypicFeatures[5]/onset/interval/start, DELETE, 'phenotypicFeatures[5].onset.interval.start' is missing but it is required", + "/phenotypicFeatures[5]/onset/interval/end, DELETE, 'phenotypicFeatures[5].onset.interval.end' is missing but it is required", }) public void checkTimeElementConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -124,7 +124,7 @@ public void checkTimeElementConstraints(String path, String action, String expec */ @ParameterizedTest @CsvSource({ - "/phenotypicFeatures[0]/evidence[0]/evidenceCode, DELETE, '$.phenotypicFeatures[0].evidence[0].evidenceCode: is missing but it is required'", + "/phenotypicFeatures[0]/evidence[0]/evidenceCode, DELETE, 'phenotypicFeatures[0].evidence[0].evidenceCode' is missing but it is required", }) public void checkEvidenceConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -136,9 +136,9 @@ public void checkEvidenceConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/measurements[0]/assay, DELETE, '$.measurements[0].assay: is missing but it is required'", - "/measurements[0]/value, DELETE, '$.measurements[0].value: is missing but it is required|$.measurements[0].complexValue: is missing but it is required'", - "/measurements[1]/complexValue, DELETE, '$.measurements[1].value: is missing but it is required|$.measurements[1].complexValue: is missing but it is required'", + "/measurements[0]/assay, DELETE, 'measurements[0].assay' is missing but it is required", + "/measurements[0]/value, DELETE, 'measurements[0].value' is missing but it is required|'measurements[0].complexValue' is missing but it is required", + "/measurements[1]/complexValue, DELETE, 'measurements[1].value' is missing but it is required|'measurements[1].complexValue' is missing but it is required", }) public void checkMeasurementConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -149,7 +149,7 @@ public void checkMeasurementConstraints(String path, String action, String expec */ @ParameterizedTest @CsvSource({ - "/biosamples[0]/id, DELETE, '$.biosamples[0].id: is missing but it is required'", + "/biosamples[0]/id, DELETE, 'biosamples[0].id' is missing but it is required", }) public void checkBiosampleConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -161,8 +161,8 @@ public void checkBiosampleConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/id, DELETE, '$.interpretations[0].id: is missing but it is required'", - "/interpretations[0]/progressStatus, DELETE, '$.interpretations[0].progressStatus: is missing but it is required'", + "/interpretations[0]/id, DELETE, 'interpretations[0].id' is missing but it is required", + "/interpretations[0]/progressStatus, DELETE, 'interpretations[0].progressStatus' is missing but it is required", }) public void checkInterpretationConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -173,7 +173,7 @@ public void checkInterpretationConstraints(String path, String action, String ex */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/disease, DELETE, '$.interpretations[0].diagnosis.disease: is missing but it is required'", + "/interpretations[0]/diagnosis/disease, DELETE, 'interpretations[0].diagnosis.disease' is missing but it is required", }) public void checkDiagnosisConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -185,12 +185,12 @@ public void checkDiagnosisConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[0]/subjectOrBiosampleId, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].subjectOrBiosampleId: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/interpretationStatus, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].interpretationStatus: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/subjectOrBiosampleId, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].subjectOrBiosampleId' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/interpretationStatus, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].interpretationStatus' is missing but it is required", // TODO - as of now this leads to 2 errors instead of just one // "/interpretations[0]/diagnosis/genomicInterpretations[0]/interpretationStatus, SET[gibberish], '$.interpretations[0].diagnosis.genomicInterpretations[0].interpretationStatus: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].gene: is missing but it is required|$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[1].gene: is missing but it is required|$.interpretations[0].diagnosis.genomicInterpretations[1].variantInterpretation: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].gene' is missing but it is required|'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene' is missing but it is required|'interpretations[0].diagnosis.genomicInterpretations[1].variantInterpretation' is missing but it is required", }) public void checkGenomicInterpretationConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -202,8 +202,8 @@ public void checkGenomicInterpretationConstraints(String path, String action, St */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/valueId, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[1].gene.valueId: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/symbol, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[1].gene.symbol: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/valueId, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene.valueId' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[1]/gene/symbol, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[1].gene.symbol' is missing but it is required", }) public void checkGeneDescriptorConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -215,9 +215,9 @@ public void checkGeneDescriptorConstraints(String path, String action, String ex */ @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/acmgPathogenicityClassification, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.acmgPathogenicityClassification: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/therapeuticActionability, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.therapeuticActionability: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/acmgPathogenicityClassification, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.acmgPathogenicityClassification' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/therapeuticActionability, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.therapeuticActionability' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor' is missing but it is required", }) public void checkVariantInterpretationConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -237,7 +237,7 @@ public void checkVariantInterpretationConstraints(String path, String action, St */ @ParameterizedTest @CsvSource({ - "/diseases[0]/term, DELETE, '$.diseases[0].term: is missing but it is required'", + "/diseases[0]/term, DELETE, 'diseases[0].term' is missing but it is required", }) public void checkDiseaseConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -248,10 +248,10 @@ public void checkDiseaseConstraints(String path, String action, String expected) */ @ParameterizedTest @CsvSource({ - "/medicalActions[0]/procedure, DELETE, '$.medicalActions[0].procedure: is missing but it is required|$.medicalActions[0].treatment: is missing but it is required|$.medicalActions[0].radiationTherapy: is missing but it is required|$.medicalActions[0].therapeuticRegimen: is missing but it is required'", - "/medicalActions[1]/treatment, DELETE, '$.medicalActions[1].procedure: is missing but it is required|$.medicalActions[1].treatment: is missing but it is required|$.medicalActions[1].radiationTherapy: is missing but it is required|$.medicalActions[1].therapeuticRegimen: is missing but it is required'", - "/medicalActions[2]/radiationTherapy, DELETE, '$.medicalActions[2].procedure: is missing but it is required|$.medicalActions[2].treatment: is missing but it is required|$.medicalActions[2].radiationTherapy: is missing but it is required|$.medicalActions[2].therapeuticRegimen: is missing but it is required'", - "/medicalActions[3]/therapeuticRegimen, DELETE, '$.medicalActions[3].procedure: is missing but it is required|$.medicalActions[3].treatment: is missing but it is required|$.medicalActions[3].radiationTherapy: is missing but it is required|$.medicalActions[3].therapeuticRegimen: is missing but it is required'", + "/medicalActions[0]/procedure, DELETE, 'medicalActions[0].procedure' is missing but it is required|'medicalActions[0].treatment' is missing but it is required|'medicalActions[0].radiationTherapy' is missing but it is required|'medicalActions[0].therapeuticRegimen' is missing but it is required", + "/medicalActions[1]/treatment, DELETE, 'medicalActions[1].procedure' is missing but it is required|'medicalActions[1].treatment' is missing but it is required|'medicalActions[1].radiationTherapy' is missing but it is required|'medicalActions[1].therapeuticRegimen' is missing but it is required", + "/medicalActions[2]/radiationTherapy, DELETE, 'medicalActions[2].procedure' is missing but it is required|'medicalActions[2].treatment' is missing but it is required|'medicalActions[2].radiationTherapy' is missing but it is required|'medicalActions[2].therapeuticRegimen' is missing but it is required", + "/medicalActions[3]/therapeuticRegimen, DELETE, 'medicalActions[3].procedure' is missing but it is required|'medicalActions[3].treatment' is missing but it is required|'medicalActions[3].radiationTherapy' is missing but it is required|'medicalActions[3].therapeuticRegimen' is missing but it is required", }) public void checkMedicalActionConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -262,7 +262,7 @@ public void checkMedicalActionConstraints(String path, String action, String exp */ @ParameterizedTest @CsvSource({ - "/medicalActions[0]/procedure/code, DELETE, '$.medicalActions[0].procedure.code: is missing but it is required'" + "/medicalActions[0]/procedure/code, DELETE, 'medicalActions[0].procedure.code' is missing but it is required" }) public void checkProcedureConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -273,7 +273,7 @@ public void checkProcedureConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/medicalActions[1]/treatment/agent, DELETE, '$.medicalActions[1].treatment.agent: is missing but it is required'" + "/medicalActions[1]/treatment/agent, DELETE, 'medicalActions[1].treatment.agent' is missing but it is required" }) public void checkTreatmentConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -285,10 +285,10 @@ public void checkTreatmentConstraints(String path, String action, String expecte */ @ParameterizedTest @CsvSource({ - "/medicalActions[2]/radiationTherapy/modality, DELETE, '$.medicalActions[2].radiationTherapy.modality: is missing but it is required'", - "/medicalActions[2]/radiationTherapy/bodySite, DELETE, '$.medicalActions[2].radiationTherapy.bodySite: is missing but it is required'", - "/medicalActions[2]/radiationTherapy/dosage, DELETE, '$.medicalActions[2].radiationTherapy.dosage: is missing but it is required'", - "/medicalActions[2]/radiationTherapy/fractions, DELETE, '$.medicalActions[2].radiationTherapy.fractions: is missing but it is required'" + "/medicalActions[2]/radiationTherapy/modality, DELETE, 'medicalActions[2].radiationTherapy.modality' is missing but it is required", + "/medicalActions[2]/radiationTherapy/bodySite, DELETE, 'medicalActions[2].radiationTherapy.bodySite' is missing but it is required", + "/medicalActions[2]/radiationTherapy/dosage, DELETE, 'medicalActions[2].radiationTherapy.dosage' is missing but it is required", + "/medicalActions[2]/radiationTherapy/fractions, DELETE, 'medicalActions[2].radiationTherapy.fractions' is missing but it is required" }) public void checkRadiationTherapyConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -300,9 +300,9 @@ public void checkRadiationTherapyConstraints(String path, String action, String */ @ParameterizedTest @CsvSource({ - "/medicalActions[3]/therapeuticRegimen/externalReference, DELETE, '$.medicalActions[3].therapeuticRegimen.ontologyClass: is missing but it is required|$.medicalActions[3].therapeuticRegimen.externalReference: is missing but it is required'", - "/medicalActions[4]/therapeuticRegimen/ontologyClass, DELETE, '$.medicalActions[4].therapeuticRegimen.ontologyClass: is missing but it is required|$.medicalActions[4].therapeuticRegimen.externalReference: is missing but it is required'", - "/medicalActions[3]/therapeuticRegimen/regimenStatus, DELETE, '$.medicalActions[3].therapeuticRegimen.regimenStatus: is missing but it is required'" + "/medicalActions[3]/therapeuticRegimen/externalReference, DELETE, 'medicalActions[3].therapeuticRegimen.ontologyClass' is missing but it is required|'medicalActions[3].therapeuticRegimen.externalReference' is missing but it is required", + "/medicalActions[4]/therapeuticRegimen/ontologyClass, DELETE, 'medicalActions[4].therapeuticRegimen.ontologyClass' is missing but it is required|'medicalActions[4].therapeuticRegimen.externalReference' is missing but it is required", + "/medicalActions[3]/therapeuticRegimen/regimenStatus, DELETE, 'medicalActions[3].therapeuticRegimen.regimenStatus' is missing but it is required" }) public void checkTherapeuticRegimenConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -314,7 +314,7 @@ public void checkTherapeuticRegimenConstraints(String path, String action, Strin */ @ParameterizedTest @CsvSource({ - "/files[0]/uri, DELETE, '$.files[0].uri: is missing but it is required'", + "/files[0]/uri, DELETE, 'files[0].uri' is missing but it is required", }) public void checkFileConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -326,10 +326,10 @@ public void checkFileConstraints(String path, String action, String expected) { */ @ParameterizedTest @CsvSource({ - "/metaData/created, DELETE, '$.metaData.created: is missing but it is required'", - "/metaData/createdBy, DELETE, '$.metaData.createdBy: is missing but it is required'", - "/metaData/resources[*], DELETE, '$.metaData.resources: there must be a minimum of 1 items in the array'", - "/metaData/phenopacketSchemaVersion, DELETE, '$.metaData.phenopacketSchemaVersion: is missing but it is required'", + "/metaData/created, DELETE, 'metaData.created' is missing but it is required", + "/metaData/createdBy, DELETE, 'metaData.createdBy' is missing but it is required", + "/metaData/resources[*], DELETE, 'metaData.resources' there must be a minimum of 1 items in the array", + "/metaData/phenopacketSchemaVersion, DELETE, 'metaData.phenopacketSchemaVersion' is missing but it is required", }) public void checkMetaDataConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -341,12 +341,12 @@ public void checkMetaDataConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/metaData/resources[0]/id, DELETE, '$.metaData.resources[0].id: is missing but it is required'", - "/metaData/resources[0]/name, DELETE, '$.metaData.resources[0].name: is missing but it is required'", - "/metaData/resources[0]/namespacePrefix, DELETE, '$.metaData.resources[0].namespacePrefix: is missing but it is required'", - "/metaData/resources[0]/url, DELETE, '$.metaData.resources[0].url: is missing but it is required'", - "/metaData/resources[0]/version, DELETE, '$.metaData.resources[0].version: is missing but it is required'", - "/metaData/resources[0]/iriPrefix, DELETE, '$.metaData.resources[0].iriPrefix: is missing but it is required'", + "/metaData/resources[0]/id, DELETE, 'metaData.resources[0].id' is missing but it is required", + "/metaData/resources[0]/name, DELETE, 'metaData.resources[0].name' is missing but it is required", + "/metaData/resources[0]/namespacePrefix, DELETE, 'metaData.resources[0].namespacePrefix' is missing but it is required", + "/metaData/resources[0]/url, DELETE, 'metaData.resources[0].url' is missing but it is required", + "/metaData/resources[0]/version, DELETE, 'metaData.resources[0].version' is missing but it is required", + "/metaData/resources[0]/iriPrefix, DELETE, 'metaData.resources[0].iriPrefix' is missing but it is required", }) public void checkResourceConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -358,7 +358,7 @@ public void checkResourceConstraints(String path, String action, String expected */ @ParameterizedTest @CsvSource({ - "/metaData/updates[0]/timestamp, DELETE, '$.metaData.updates[0].timestamp: is missing but it is required'", + "/metaData/updates[0]/timestamp, DELETE, 'metaData.updates[0].timestamp' is missing but it is required", }) public void checkUpdateConstraints(String path, String action, String expected) { testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); @@ -406,11 +406,11 @@ public void validFamilyYieldsNoErrors() { @ParameterizedTest @CsvSource({ - "/id, DELETE, '$.id: is missing but it is required'", - "/proband, DELETE, '$.proband: is missing but it is required'", - "/consanguinousParents, DELETE, '$.consanguinousParents: is missing but it is required'", - "/pedigree, DELETE, '$.pedigree: is missing but it is required'", - "/metaData, DELETE, '$.metaData: is missing but it is required'", + "/id, DELETE, 'id' is missing but it is required", + "/proband, DELETE, 'proband' is missing but it is required", + "/consanguinousParents, DELETE, 'consanguinousParents' is missing but it is required", + "/pedigree, DELETE, 'pedigree' is missing but it is required", + "/metaData, DELETE, 'metaData' is missing but it is required", }) public void absenceOfTopLevelFamilyElementsYieldsErrors(String path, String action, String expected) { testErrors(runner, readExampleFamilyNode(), path, action, expected); @@ -418,8 +418,8 @@ public void absenceOfTopLevelFamilyElementsYieldsErrors(String path, String acti @ParameterizedTest @CsvSource({ - "/pedigree/persons, DELETE, '$.pedigree.persons: is missing but it is required'", - "/pedigree/persons[*], DELETE, '$.pedigree.persons: there must be a minimum of 1 items in the array'", + "/pedigree/persons, DELETE, 'pedigree.persons' is missing but it is required", + "/pedigree/persons[*], DELETE, 'pedigree.persons' there must be a minimum of 1 items in the array", }) public void emptyPedigreeYieldsError(String path, String action, String expected) { testErrors(runner, readExampleFamilyNode(), path, action, expected); @@ -462,9 +462,9 @@ public class RequiredFieldsTest { */ @ParameterizedTest @CsvSource({ - "/id, DELETE, '$.id: is missing but it is required'", - "/members[*], DELETE, '$.members: there must be a minimum of 1 items in the array'", - "/metaData, DELETE, '$.metaData: is missing but it is required'", + "/id, DELETE, 'id' is missing but it is required", + "/members[*], DELETE, 'members' there must be a minimum of 1 items in the array", + "/metaData, DELETE, 'metaData' is missing but it is required", }) public void checkCohortConstraints(String path, String action, String expected) { testErrors(runner, readExampleCohortNode(), path, action, expected); diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java index 7be1357d..99b342fc 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaDiseaseValidatorTest.java @@ -87,7 +87,7 @@ public void testLacksId() throws Exception { assertEquals(1, errors.size()); ValidationResult error = errors.get(0); // Assertions.assertEquals(JsonError.REQUIRED, error.category()); - assertEquals("$.id: is missing but it is required", error.message()); + assertEquals("'id' is missing but it is required", error.message()); } private static JsonNode mapPhenopacketToJsonNode(PhenopacketOrBuilder phenopacket) throws Exception { diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java index 7b7b0bab..f0194326 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/impl/JsonSchemaValidatorTest.java @@ -56,7 +56,7 @@ public void testValidationOfSimpleValidPhenopacket() throws Exception { assertEquals(1, errors.size()); ValidationResult error = errors.get(0); Assertions.assertEquals("required", error.category()); - assertEquals("$.id: is missing but it is required", error.message()); + assertEquals("'id' is missing but it is required", error.message()); } /** @@ -74,14 +74,14 @@ public void testValidationOfSimpleInValidPhenopacket() throws Exception { ValidationResult error = errors.get(0); // JsonError.CATEGORY is "JSON" assertEquals("required", error.category()); - assertEquals("$.id: is missing but it is required", error.message()); + assertEquals("'id' is missing but it is required", error.message()); error = errors.get(1); assertEquals("required", error.category()); - assertEquals("$.metaData: is missing but it is required", error.message()); + assertEquals("'metaData' is missing but it is required", error.message()); error = errors.get(2); assertEquals("additionalProperties", error.category()); - assertEquals("$.disney: is not defined in the schema and the schema does not allow additional properties", error.message()); + assertEquals("'disney' is not defined in the schema and the schema does not allow additional properties", error.message()); } @Test @@ -130,7 +130,7 @@ public void invalidEnum() throws JsonProcessingException { assertEquals(1, errors.size()); ValidationResult error = errors.get(0); assertEquals("enum", error.category()); - assertEquals("$.subject.sex: does not have a value in the enumeration [UNKNOWN_SEX, FEMALE, MALE, OTHER_SEX]", error.message()); + assertEquals("'subject.sex' does not have a value in the enumeration [UNKNOWN_SEX, FEMALE, MALE, OTHER_SEX]", error.message()); assertEquals(ValidationLevel.ERROR, error.level()); } From 466f335a10612c9624f1ba36d196d83891b199b6 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 8 Nov 2022 11:58:23 -0500 Subject: [PATCH 59/79] Relax validation of VRS-like `Variation` struct. Signed-off-by: Daniel Danis --- .../validator/jsonschema/README.md | 24 + .../validator/jsonschema/vrsatile.json | 7 +- ...sonSchemaValidationWorkflowRunnerTest.java | 26 +- .../validator/jsonschema/TestData.java | 6 + .../validator/jsonschema/retinoblastoma.json | 461 ++++++++++++++++++ 5 files changed, 512 insertions(+), 12 deletions(-) create mode 100644 phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md create mode 100644 phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md new file mode 100644 index 00000000..0b5cc126 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md @@ -0,0 +1,24 @@ +# README + +This folder contains JSON schemas for validating top-level Phenopacket schema elements and VRS elements. + +## VRSATILE notes + +The datatype of the `VcfRecord.pos` field in `vrsatile.proto` is: +``` +uint64 pos = 3; +``` + +Since Protobuf's `JSONFormat` serializes `uint64` fields into a JSON `string` instead of JSON `number`, +the JSON type of the `VcfRecord.pos` field is a: + +``` + "type": "string", + "pattern": "^[1-9][0-9]*$" +``` + +instead of a more straightforward: + +``` +"type": "integer" +``` diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json index 1af561e1..0fbbb838 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema#", + "$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://www.ga4gh.org/phenopackets", "title": "VRS Added Tools for Interoperable Loquacious Exchange", "description": "VRSATILE: A set of proposed extensions for GA4GH's Variation Representation Specification (VRS) to enable interoperable exchange of common descriptive data alongside variation concepts", @@ -58,7 +58,8 @@ }, "pos" : { "description": "position on the chromosome (VCF convention)", - "type": "integer" + "type": "string", + "pattern": "^[1-9][0-9]*$" }, "id" : { "description": "identifier as used in VCF line", @@ -97,7 +98,7 @@ "type": "string" }, "variation": { - "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json#/definitions/Variation", + "type": "object", "description": "The VRS Variation object" }, "label": { diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index cf885756..a98749ae 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import java.nio.file.Path; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -223,14 +224,13 @@ public void checkVariantInterpretationConstraints(String path, String action, St testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); } -// TODO - implement tests -// @ParameterizedTest -// @CsvSource({ -// "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", -// }) -// public void checkVariationDescriptorConstraints(String path, String action, String expected) { -// testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); -// } + @ParameterizedTest + @CsvSource({ + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", + }) + public void checkVariationDescriptorConstraints(String path, String action, String expected) { + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected); + } /** * Absence of `term` leads to an {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel#ERROR}. @@ -372,7 +372,15 @@ public class RecommendedFieldsTest { } private static JsonNode readBethlemPhenopacketNode() { - try (InputStream is = Files.newInputStream(TestData.BETHLEM_MYOPATHY_PHENOPACKET_JSON)){ + return readJsonTree(TestData.BETHLEM_MYOPATHY_PHENOPACKET_JSON); + } + + private static JsonNode readRetinoblastomaPhenopacketNode() { + return readJsonTree(TestData.RETINOBLASTOMA_PHENOPACKET_JSON); + } + + private static JsonNode readJsonTree(Path jsonPath) { + try (InputStream is = Files.newInputStream(jsonPath)){ return MAPPER.readTree(is); } catch (IOException e) { throw new RuntimeException(e); diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java index 94356a47..615e29f2 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java @@ -18,6 +18,12 @@ public class TestData { */ public static final Path BETHLEM_MYOPATHY_PHENOPACKET_JSON = TEST_BASE_DIR.resolve("bethlem-myopathy.json"); + /** + * A path to an example phenopacket representing a case of retinoblastoma. The phenopacket is useful since + * it contains a VRS-like Variation object. + */ + public static final Path RETINOBLASTOMA_PHENOPACKET_JSON = TEST_BASE_DIR.resolve("retinoblastoma.json"); + /** * A path to an example family that, despite being medically invalid/nonsense, is complete from the testing diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json new file mode 100644 index 00000000..f0228bbf --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json @@ -0,0 +1,461 @@ +{ + "id": "arbitrary.id", + "subject": { + "id": "proband A", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P6M" + } + }, + "sex": "FEMALE", + "karyotypicSex": "XX" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0030084", + "label": "Clinodactyly" + }, + "modifiers": [{ + "id": "HP:0012834", + "label": "Right" + }], + "onset": { + "age": { + "iso8601duration": "P3M" + } + } + }, { + "type": { + "id": "HP:0000555", + "label": "Leukocoria" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P4M" + } + } + }, { + "type": { + "id": "HP:0000486", + "label": "Strabismus" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P5M15D" + } + } + }, { + "type": { + "id": "HP:0000541", + "label": "Retinal detachment" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:79893-4", + "label": "Left eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 25.0, + "referenceRange": { + "unit": { + "id": "56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }, { + "assay": { + "id": "LOINC:79892-6", + "label": "Right eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 15.0, + "referenceRange": { + "unit": { + "id": "56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "biosamples": [{ + "id": "biosample.1", + "sampledTissue": { + "id": "UBERON:0000970", + "label": "eye" + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C35941", + "label": "Flexner-Wintersteiner Rosette Formation" + } + }, { + "type": { + "id": "NCIT:C132485", + "label": "Apoptosis and Necrosis" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:33728-7", + "label": "Size.maximum dimension in Tumor" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm", + "label": "millimeter" + }, + "value": 15.0 + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P8M2W" + } + } + }], + "tumorProgression": { + "id": "NCIT:C8509", + "label": "Primary Neoplasm" + }, + "pathologicalTnmFinding": [{ + "id": "NCIT:C140720", + "label": "Retinoblastoma pT3 TNM Finding v8" + }, { + "id": "NCIT:C140711", + "label": "Retinoblastoma pN0 TNM Finding v8" + }], + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "files": [{ + "uri": "file://data/fileSomaticWgs.vcf.gz", + "individualToFileIdentifiers": { + "biosample.1": "specimen.1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }] + }], + "interpretations": [{ + "id": "interpretation.id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "proband A", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "variation": { + "copyNumber": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.14", + "sequenceInterval": { + "startNumber": { + "value": "25981249" + }, + "endNumber": { + "value": "61706822" + } + } + } + }, + "number": { + "value": "1" + } + } + }, + "extensions": [{ + "name": "mosaicism", + "value": "40.0%" + }] + } + } + }, { + "subjectOrBiosampleId": "biosample.1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "rs121913300", + "variation": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.11", + "sequenceInterval": { + "startNumber": { + "value": "48367511" + }, + "endNumber": { + "value": "48367512" + } + } + }, + "literalSequenceExpression": { + "sequence": "T" + } + } + }, + "label": "RB1 c.958C\u003eT (p.Arg320Ter)", + "geneContext": { + "valueId": "HGNC:9884", + "symbol": "RB1" + }, + "expressions": [{ + "syntax": "hgvs.c", + "value": "NM_000321.2:c.958C\u003eT" + }, { + "syntax": "transcript_reference", + "value": "NM_000321.2" + }], + "vcfRecord": { + "genomeAssembly": "GRCh38", + "chrom": "NC_000013.11", + "pos": "48367512", + "ref": "C", + "alt": "T" + }, + "extensions": [{ + "name": "allele-frequency", + "value": "25.0%" + }], + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "onset": { + "age": { + "iso8601duration": "P4M" + } + }, + "diseaseStage": [{ + "id": "LOINC:LA24739-7", + "label": "Group E" + }], + "clinicalTnmFinding": [{ + "id": "NCIT:C140678", + "label": "Retinoblastoma cM0 TNM Finding v8" + }], + "primarySite": { + "id": "UBERON:0004548", + "label": "left eye" + } + }], + "medicalActions": [{ + "treatment": { + "agent": { + "id": "DrugCentral:1678", + "label": "melphalan" + }, + "routeOfAdministration": { + "id": "NCIT:C38222", + "label": "Intraarterial Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UCUM:mg.kg-1", + "label": "milligram per kilogram" + }, + "value": 0.4 + }, + "scheduleFrequency": { + "id": "NCIT:C64576", + "label": "Once" + }, + "interval": { + "start": "2020-09-02T00:00:00Z", + "end": "2020-09-02T00:00:00Z" + } + }] + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + }, + "adverseEvents": [{ + "id": "HP:0025637", + "label": "Vasospasm" + }], + "treatmentTerminationReason": { + "id": "NCIT:C41331", + "label": "Adverse Event" + } + }, { + "therapeuticRegimen": { + "ontologyClass": { + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" + }, + "startTime": { + "age": { + "iso8601duration": "P7M" + } + }, + "endTime": { + "age": { + "iso8601duration": "P8M" + } + }, + "regimenStatus": "COMPLETED" + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }, { + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }], + "files": [{ + "uri": "file://data/germlineWgs.vcf.gz", + "individualToFileIdentifiers": { + "proband A": "sample1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "3.34.0", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2021-07-27", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2021-06-10", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file From c554675777daa2d93ae76e47d39245aeb4dbec7b Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Tue, 8 Nov 2022 14:02:48 -0500 Subject: [PATCH 60/79] adding UCUM, LOINC, DrugCentral resource builders --- .../phenopackettools/builder/builders/Resources.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java index ff5c3d0e..c527b41c 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/Resources.java @@ -77,6 +77,9 @@ private Resources() { .setUrl("http://purl.obolibrary.org/obo/so.owl") .setIriPrefix("http://purl.obolibrary.org/obo/SO_"); + /** this is the version of the UCUM schema that has been valid since 2017-11-21 */ + private static final String DEFAULT_UCUM_VERSION = "2.1"; + private static final Resource.Builder UO_BUILDER = Resource.newBuilder() .setId("uo") .setName("Units of measurement ontology") @@ -96,7 +99,7 @@ private Resources() { .setName("Unified Code for Units of Measure") .setNamespacePrefix("UCUM") .setUrl("https://ucum.org") - .setIriPrefix("https://ucum.org/"); // TODO - we need to get a real prefix + .setIriPrefix("https://ucum.org/"); private static final Resource.Builder LOINC_BUILDER = Resource.newBuilder() .setId("loinc") @@ -162,6 +165,10 @@ public static Resource ucumVersion(String version) { return UCUM_BUILDER.setVersion(version).build(); } + public static Resource ucum() { + return ucumVersion(DEFAULT_UCUM_VERSION); + } + public static Resource loincVersion(String version) { return LOINC_BUILDER.setVersion(version).build(); } From c988c3bcd24430f092c1c5e1ca5b06825cdd9edb Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 12:18:50 -0500 Subject: [PATCH 61/79] Replace `vrs.json` with `Variation` adapter. Signed-off-by: Daniel Danis --- .../jsonschema/vrs-variation-adapter.json | 470 +++++++++ .../validator/jsonschema/vrs.json | 980 ------------------ .../validator/jsonschema/vrsatile.json | 6 +- 3 files changed, 473 insertions(+), 983 deletions(-) create mode 100644 phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json delete mode 100644 phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json new file mode 100644 index 00000000..8e7dcd02 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json @@ -0,0 +1,470 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://www.ga4gh.org/phenopackets/vrs-facade", + "title": "VRS Variation facade", + "description": "An adapter for the VRS Variation representation that is embedded into Phenopacket schema. Note that the adapter does not map 1:1 to VRS Variation.", + "type": "object", + "properties": { + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "copyNumber": { + "$ref": "#/definitions/CopyNumber" + }, + "text": { + "$ref": "#/definitions/Text" + }, + "variationSet": { + "$ref": "#/definitions/VariationSet" + } + }, + "oneOf": [ + { "required": [ "allele" ]}, + { "required": [ "haplotype" ]}, + { "required": [ "copyNumber" ]}, + { "required": [ "text" ]}, + { "required": [ "variationSet"]} + ], + "additionalProperties": false, + + + "definitions": { + "Allele": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "curie": { + "type": "string" + }, + "chromosomeLocation": { + "$ref": "#/definitions/ChromosomeLocation" + }, + "sequenceLocation": { + "$ref": "#/definitions/SequenceLocation" + }, + + "sequenceState": { + "$ref": "#/definitions/SequenceState" + }, + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "repeatedSequenceExpression": { + "$ref": "#/definitions/RepeatedSequenceExpression" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["curie"] }, + { "required": ["chromosomeLocation"] }, + { "required": ["sequenceLocation"] } + ] + }, { + "oneOf": [ + { "required": ["sequenceState"] }, + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] }, + { "required": ["repeatedSequenceExpression"] } + ] + } + ], + "additionalProperties": false + }, + + "Haplotype": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "members": { + "type": "array", + "items": { + "$ref": "#/definitions/HaplotypeMember" + } + } + }, + "additionalProperties": false + }, + + "HaplotypeMember": { + "type": "object", + "description": "A utility object for representing Haplotype.Member of vrs.proto", + "properties": { + "allele": { + "$ref": "#/definitions/Allele" + }, + "curie": { + "type": "string" + } + }, + "oneOf": [ + { "required": ["allele"] }, + { "required": ["curie"] } + ], + "additionalProperties": false + }, + + "CopyNumber": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "gene": { + "$ref": "#/definitions/Gene" + }, + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "repeatedSequenceExpression": { + "$ref": "#/definitions/RepeatedSequenceExpression" + }, + "curie": { + "type": "string" + }, + + "number": { + "$ref": "#/definitions/Number" + }, + "indefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "definiteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["allele"] }, + { "required": ["haplotype"] }, + { "required": ["gene"] }, + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] }, + { "required": ["repeatedSequenceExpression"] }, + { "required": ["curie"] } + ] + }, { + "oneOf": [ + { "required": ["number"] }, + { "required": ["indefiniteRange"] }, + { "required": ["definiteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "Text": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "definition": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "VariationSet": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "members": { + "type": "array", + "items": { + "$ref": "#/definitions/VariationSetMember" + } + } + }, + "additionalProperties": false + }, + + "VariationSetMember": { + "type": "object", + "description": "A utility object for representing VariationSet.Member of vrs.proto", + "properties": { + "curie": { + "type": "string" + }, + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "copyNumber": { + "$ref": "#/definitions/CopyNumber" + }, + "text": { + "$ref": "#/definitions/Text" + }, + "variationSet": { + "$ref": "#/definitions/VariationSet" + } + }, + "oneOf": [ + { "required": ["curie"] }, + { "required": ["allele"] }, + { "required": ["haplotype"] }, + { "required": ["copyNumber"] }, + { "required": ["text"] }, + { "required": ["variationSet"] } + ], + "additionalProperties": false + }, + + "ChromosomeLocation": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "speciesId": { + "type": "string" + }, + "chr": { + "type": "string" + }, + "interval": { + "$ref": "#/definitions/CytobandInterval" + } + }, + "additionalProperties": false + }, + + "SequenceLocation": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "sequenceId": { + "type": "string" + }, + "sequenceInterval": { + "$ref": "#/definitions/SequenceInterval" + }, + "simpleInterval": { + "$ref": "#/definitions/SimpleInterval" + } + }, + "oneOf": [ + { "required": ["sequenceInterval"] }, + { "required": ["simpleInterval"] } + ], + "additionalProperties": false + }, + + "SequenceInterval": { + "type": "object", + "properties": { + "startNumber": { + "$ref": "#/definitions/Number" + }, + "startIndefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "startDefiniteRange": { + "$ref": "#/definitions/DefiniteRange" + }, + "endNumber": { + "$ref": "#/definitions/Number" + }, + "endIndefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "endDefiniteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["startNumber"] }, + { "required": ["startIndefiniteRange"] }, + { "required": ["startDefiniteRange"] } + ] + }, { + "oneOf": [ + { "required": ["endNumber"] }, + { "required": ["endIndefiniteRange"] }, + { "required": ["endDefiniteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "SimpleInterval": { + "type": "object", + "properties": { + "start": { + "$ref": "#/definitions/UnsignedInt64" + }, + "end": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "CytobandInterval": { + "type": "object", + "properties": { + "start": { + "type": "string" + }, + "end": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "Gene": { + "type": "object", + "properties": { + "geneId": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "Number": { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "IndefiniteRange": { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/UnsignedInt64" + }, + "comparator": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "DefiniteRange": { + "type": "object", + "properties": { + "min": { + "$ref": "#/definitions/UnsignedInt64" + }, + "max": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "SequenceState": { + "type": "object", + "properties": { + "sequence": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "LiteralSequenceExpression": { + "type": "object", + "properties": { + "sequence": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "DerivedSequenceExpression": { + "type": "object", + "properties": { + "location": { + "$ref": "#/definitions/SequenceLocation" + }, + "reverseComplement": { + "type": "boolean" + } + }, + "additionalProperties": false + }, + + "RepeatedSequenceExpression": { + "type": "object", + "properties": { + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "number": { + "$ref": "#/definitions/Number" + }, + "indefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "definiteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] } + ] + }, { + "oneOf": [ + { "required": ["number"] }, + { "required": ["indefiniteRange"] }, + { "required": ["definiteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "UnsignedInt64": { + "type": "string", + "pattern": "^[0-9]+$", + "description": "A utility to represent Protobuf `uint64` data type" + } + } +} \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json deleted file mode 100644 index ca2366be..00000000 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json +++ /dev/null @@ -1,980 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "GA4GH-VRS-Definitions", - "type": "object", - "definitions": { - "Variation": { - "description": "The root class of all Variation types", - "oneOf": [ - { - "$ref": "#/definitions/MolecularVariation" - }, - { - "$ref": "#/definitions/SystemicVariation" - }, - { - "$ref": "#/definitions/UtilityVariation" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "MolecularVariation": { - "description": "A variation on a contiguous molecule.", - "oneOf": [ - { - "$ref": "#/definitions/Allele" - }, - { - "$ref": "#/definitions/Haplotype" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "UtilityVariation": { - "description": "Utility variation classes that cannot be constrained to a specific biological class of variation.", - "oneOf": [ - { - "$ref": "#/definitions/Text" - }, - { - "$ref": "#/definitions/VariationSet" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "SystemicVariation": { - "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", - "oneOf": [ - { - "$ref": "#/definitions/Abundance" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Allele": { - "description": "The sequence state at a Location.", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Allele" - ], - "default": "Allele" - }, - "location": { - "oneOf": [ - { - "$ref": "#/definitions/CURIE" - }, - { - "$ref": "#/definitions/Location" - } - ] - }, - "state": { - "oneOf": [ - { - "$ref": "#/definitions/SequenceState" - }, - { - "$ref": "#/definitions/SequenceExpression" - } - ] - } - }, - "required": [ - "type", - "location", - "state" - ] - }, - "Haplotype": { - "description": "A set of zero or more Alleles", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Haplotype" - ], - "default": "Haplotype" - }, - "members": { - "type": "array", - "minItems": 1, - "uniqueItems": true, - "items": { - "oneOf": [ - { - "$ref": "#/definitions/Allele" - }, - { - "$ref": "#/definitions/CURIE" - } - ] - } - } - }, - "required": [ - "type", - "members" - ] - }, - "Text": { - "description": "A textual description of variation, typically not parseable but understood by humans.", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Text" - ], - "default": "Text" - }, - "definition": { - "type": "string", - "description": "An textual representation of variation intended to capture variation descriptions that cannot be parsed, but still treated as variation." - } - }, - "required": [ - "type", - "definition" - ] - }, - "VariationSet": { - "description": "A set of Variation objects.\nMembers may be specified inline or by reference (with CURIEs)", - "type": "object", - "additionalProperties": false, - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "VariationSet" - ], - "default": "VariationSet" - }, - "members": { - "type": "array", - "uniqueItems": true, - "items": { - "oneOf": [ - { - "$ref": "#/definitions/CURIE" - }, - { - "$ref": "#/definitions/Variation" - } - ] - } - } - }, - "required": [ - "type", - "members" - ] - }, - "Abundance": { - "description": "The quantity of a feature, variation, molecule or part thereof in a system.", - "oneOf": [ - { - "$ref": "#/definitions/CopyNumber" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "CopyNumber": { - "additionalProperties": false, - "type": "object", - "description": "The count of copies of a Feature, Location, or Molecular Variation subject within a genome.", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "CopyNumber" - ], - "default": "CopyNumber" - }, - "subject": { - "oneOf": [ - { - "$ref": "#/definitions/MolecularVariation" - }, - { - "$ref": "#/definitions/Feature" - }, - { - "$ref": "#/definitions/SequenceExpression" - }, - { - "$ref": "#/definitions/CURIE" - } - ] - }, - "copies": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "subject", - "copies" - ] - }, - "Location": { - "description": "A Location represents a span on a specific sequence.", - "oneOf": [ - { - "$ref": "#/definitions/ChromosomeLocation" - }, - { - "$ref": "#/definitions/SequenceLocation" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "ChromosomeLocation": { - "additionalProperties": false, - "description": "A region of a chromosomed specified by species and name using cytogenetic naming conventions", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ChromosomeLocation" - ], - "default": "ChromosomeLocation" - }, - "_id": { - "$ref": "#/definitions/CURIE" - }, - "species_id": { - "$ref": "#/definitions/CURIE", - "default": "taxonomy:9606" - }, - "chr": { - "type": "string" - }, - "interval": { - "$ref": "#/definitions/CytobandInterval" - } - }, - "required": [ - "type", - "species_id", - "chr", - "interval" - ] - }, - "SequenceLocation": { - "additionalProperties": false, - "description": "A specified subsequence within another sequence that is used as a reference sequence.", - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "SequenceLocation" - ], - "default": "SequenceLocation" - }, - "sequence_id": { - "$ref": "#/definitions/CURIE" - }, - "interval": { - "oneOf": [ - { - "$ref": "#/definitions/SequenceInterval" - }, - { - "$ref": "#/definitions/SimpleInterval" - } - ] - } - }, - "required": [ - "type", - "sequence_id", - "interval" - ] - }, - "SequenceInterval": { - "description": "A SequenceInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates.\nSequenceInterval is intended to be compatible with that in Sequence Ontology ([SO:0000001](http://www.sequenceontology.org/browser/current_svn/term/SO:0000001)), with the exception that the GA4GH VRS SequenceInterval may be zero-width. The SO definition is for an \"extent greater than zero\".", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "SequenceInterval" - ], - "default": "SequenceInterval" - }, - "start": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - }, - "end": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "start", - "end" - ] - }, - "CytobandInterval": { - "description": "A contiguous region specified by chromosomal bands features.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "CytobandInterval" - ], - "default": "CytobandInterval" - }, - "start": { - "$ref": "#/definitions/HumanCytoband" - }, - "end": { - "$ref": "#/definitions/HumanCytoband" - } - }, - "example": { - "type": "CytobandInterval", - "start": "q22.2", - "end": "q22.3" - }, - "required": [ - "type", - "start", - "end" - ] - }, - "SequenceExpression": { - "description": "One of a set of sequence representation syntaxes.", - "oneOf": [ - { - "$ref": "#/definitions/LiteralSequenceExpression" - }, - { - "$ref": "#/definitions/DerivedSequenceExpression" - }, - { - "$ref": "#/definitions/RepeatedSequenceExpression" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "LiteralSequenceExpression": { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "LiteralSequenceExpression" - ], - "default": "LiteralSequenceExpression" - }, - "sequence": { - "$ref": "#/definitions/Sequence" - } - }, - "required": [ - "type", - "sequence" - ] - }, - "DerivedSequenceExpression": { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "DerivedSequenceExpression" - ], - "default": "DerivedSequenceExpression" - }, - "location": { - "$ref": "#/definitions/SequenceLocation" - }, - "reverse_complement": { - "type": "boolean" - } - }, - "required": [ - "type", - "location", - "reverse_complement" - ] - }, - "RepeatedSequenceExpression": { - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "RepeatedSequenceExpression" - ], - "default": "RepeatedSequenceExpression" - }, - "seq_expr": { - "oneOf": [ - { - "$ref": "#/definitions/LiteralSequenceExpression" - }, - { - "$ref": "#/definitions/DerivedSequenceExpression" - } - ] - }, - "count": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "seq_expr", - "count" - ] - }, - "Feature": { - "description": "A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that may be Features.", - "oneOf": [ - { - "$ref": "#/definitions/Gene" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Gene": { - "description": "A reference to an external gene system, used as a location for variation. Currently, the `ncbigene` namespace is required. See https://registry.identifiers.org/registry/ncbigene.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "Gene" - ], - "default": "Gene" - }, - "gene_id": { - "$ref": "#/definitions/CURIE" - } - }, - "required": [ - "type", - "gene_id" - ] - }, - "Number": { - "description": "A simple number value as a VRS class.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "Number" - ], - "default": "Number" - }, - "value": { - "type": "number" - } - }, - "required": [ - "type", - "value" - ] - }, - "IndefiniteRange": { - "description": "An indefinite range represented as a number and associated comparator. The bound operator is interpreted as follows: '>=' are all values greater than and including the value, '<=' are all numbers less than and including the value.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "IndefiniteRange" - ], - "default": "IndefiniteRange" - }, - "value": { - "type": "number" - }, - "comparator": { - "type": "string", - "enum": [ - "<=", - ">=" - ] - } - }, - "required": [ - "type", - "value", - "comparator" - ] - }, - "DefiniteRange": { - "description": "A bounded, inclusive range of numbers.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "DefiniteRange" - ], - "default": "DefiniteRange" - }, - "min": { - "type": "number" - }, - "max": { - "type": "number" - } - }, - "required": [ - "type", - "min", - "max" - ] - }, - "Sequence": { - "additionalProperties": false, - "description": "A character string of residues that represents a biological sequence using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequences.", - "type": "string", - "pattern": "^[A-Z*\\-]*$" - }, - "CURIE": { - "additionalProperties": false, - "description": "A string that refers to an object uniquely. The lifetime and scope of an id is defined by the sender.\nVRS does not impose any contraints on strings used as ids in messages. However, to maximize sharability of data, VRS RECOMMENDS that implementations use [W3C Compact URI (CURIE)](https://www.w3.org/TR/curie/) syntax.\nString CURIEs are represented as `prefix`:`reference` (W3C terminology), but often referred to as `namespace`:`accession` or `namespace`:`local id` colloquially.\nVRS also RECOMMENDS that `prefix` be defined in identifiers.org.\nThe `reference` component is an unconstrained string.\nA CURIE is a URI. URIs may *locate* objects (i.e., specify where to retrieve them) or *name* objects conceptually. VRS uses CURIEs primarily as a naming mechanism.\nImplementations MAY provide CURIE resolution mechanisms for prefixes to make these objects locatable.\nUsing internal ids in public messages is strongly discouraged.", - "type": "string", - "pattern": "^\\w[^:]*:.+$", - "example": "ensembl:ENSG00000139618" - }, - "HumanCytoband": { - "additionalProperties": false, - "description": "A interval on a stained metaphase chromosome specified by cytobands. CytobandIntervals include the regions described by the start and end cytobands.", - "type": "string", - "pattern": "^cen|[pq](ter|([1-9][0-9]*(\\.[1-9][0-9]*)?))$", - "example": "q22.3" - }, - "SequenceState": { - "deprecated": true, - "description": "DEPRECATED: An assertion of the state of a sequence, typically at a Sequence Location within an Allele.\nThis class is deprecated. Use LiteralSequenceExpression instead.", - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "SequenceState" - ], - "default": "SequenceState" - }, - "sequence": { - "$ref": "#/definitions/Sequence" - } - }, - "example": { - "type": "SequenceState", - "sequence": "C" - }, - "required": [ - "type", - "sequence" - ] - }, - "SimpleInterval": { - "deprecated": true, - "description": "DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates.\nThis class is deprecated. Use SequenceInterval instead.", - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "SimpleInterval" - ], - "default": "SimpleInterval" - }, - "start": { - "type": "integer" - }, - "end": { - "type": "integer" - } - }, - "example": { - "type": "SimpleInterval", - "start": 11, - "end": 22 - }, - "required": [ - "type", - "start", - "end" - ] - } - } -} \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json index 0fbbb838..840000a4 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://www.ga4gh.org/phenopackets", + "$id": "https://www.ga4gh.org/phenopackets/vrsatile", "title": "VRS Added Tools for Interoperable Loquacious Exchange", "description": "VRSATILE: A set of proposed extensions for GA4GH's Variation Representation Specification (VRS) to enable interoperable exchange of common descriptive data alongside variation concepts", "type": "object", @@ -98,8 +98,8 @@ "type": "string" }, "variation": { - "type": "object", - "description": "The VRS Variation object" + "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json", + "description": "An adapter for the VRS Variation representation that is embedded into Phenopacket schema. Note that the adapter does not map 1:1 to VRS Variation." }, "label": { "type": "string", From c686ba0a91cb8ba33f64faf247d41ca14cf3915e Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 13:37:17 -0500 Subject: [PATCH 62/79] Rework CNV syntax in `Retinoblastoma` example. Signed-off-by: Daniel Danis --- .../examples/phenopackets/retinoblastoma.json | 498 ++++++++++++++++++ .../cli/examples/Retinoblastoma.java | 34 +- 2 files changed, 524 insertions(+), 8 deletions(-) create mode 100644 phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json b/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json new file mode 100644 index 00000000..966d7b64 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json @@ -0,0 +1,498 @@ +{ + "id": "arbitrary.id", + "subject": { + "id": "proband A", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P6M" + } + }, + "sex": "FEMALE", + "karyotypicSex": "XX" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0030084", + "label": "Clinodactyly" + }, + "modifiers": [{ + "id": "HP:0012834", + "label": "Right" + }], + "onset": { + "age": { + "iso8601duration": "P3M" + } + } + }, { + "type": { + "id": "HP:0000555", + "label": "Leukocoria" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P4M" + } + } + }, { + "type": { + "id": "HP:0000486", + "label": "Strabismus" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P5M15D" + } + } + }, { + "type": { + "id": "HP:0000541", + "label": "Retinal detachment" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:79893-4", + "label": "Left eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 25.0, + "referenceRange": { + "unit": { + "id": "LOINC:56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }, { + "assay": { + "id": "LOINC:79892-6", + "label": "Right eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 15.0, + "referenceRange": { + "unit": { + "id": "LOINC:56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "biosamples": [{ + "id": "biosample.1", + "sampledTissue": { + "id": "UBERON:0000970", + "label": "eye" + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C35941", + "label": "Flexner-Wintersteiner Rosette Formation" + } + }, { + "type": { + "id": "NCIT:C132485", + "label": "Apoptosis and Necrosis" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:33728-7", + "label": "Size.maximum dimension in Tumor" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm", + "label": "millimeter" + }, + "value": 15.0 + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P8M2W" + } + } + }], + "tumorProgression": { + "id": "NCIT:C8509", + "label": "Primary Neoplasm" + }, + "pathologicalTnmFinding": [{ + "id": "NCIT:C140720", + "label": "Retinoblastoma pT3 TNM Finding v8" + }, { + "id": "NCIT:C140711", + "label": "Retinoblastoma pN0 TNM Finding v8" + }], + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "files": [{ + "uri": "file://data/fileSomaticWgs.vcf.gz", + "individualToFileIdentifiers": { + "biosample.1": "specimen.1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }] + }], + "interpretations": [{ + "id": "interpretation.id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "proband A", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "cnv-1", + "moleculeContext": "genomic", + "variation": { + "copyNumber": { + "derivedSequenceExpression": { + "location": { + "sequenceId": "refseq:NC_000013.14", + "sequenceInterval": { + "startNumber": { + "value": "25981249" + }, + "endNumber": { + "value": "61706822" + } + } + } + }, + "number": { + "value": "1" + } + } + }, + "extensions": [{ + "name": "mosaicism", + "value": "40.0%" + }] + } + } + }, { + "subjectOrBiosampleId": "biosample.1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "rs121913300", + "variation": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.11", + "sequenceInterval": { + "startNumber": { + "value": "48367511" + }, + "endNumber": { + "value": "48367512" + } + } + }, + "literalSequenceExpression": { + "sequence": "T" + } + } + }, + "label": "RB1 c.958C\u003eT (p.Arg320Ter)", + "geneContext": { + "valueId": "HGNC:9884", + "symbol": "RB1" + }, + "expressions": [{ + "syntax": "hgvs.c", + "value": "NM_000321.2:c.958C\u003eT" + }, { + "syntax": "transcript_reference", + "value": "NM_000321.2" + }], + "vcfRecord": { + "genomeAssembly": "GRCh38", + "chrom": "NC_000013.11", + "pos": "48367512", + "ref": "C", + "alt": "T" + }, + "extensions": [{ + "name": "allele-frequency", + "value": "25.0%" + }], + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "onset": { + "age": { + "iso8601duration": "P4M" + } + }, + "diseaseStage": [{ + "id": "LOINC:LA24739-7", + "label": "Group E" + }], + "clinicalTnmFinding": [{ + "id": "NCIT:C140678", + "label": "Retinoblastoma cM0 TNM Finding v8" + }], + "primarySite": { + "id": "UBERON:0004548", + "label": "left eye" + } + }], + "medicalActions": [{ + "treatment": { + "agent": { + "id": "DrugCentral:1678", + "label": "melphalan" + }, + "routeOfAdministration": { + "id": "NCIT:C38222", + "label": "Intraarterial Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UCUM:mg.kg-1", + "label": "milligram per kilogram" + }, + "value": 0.4 + }, + "scheduleFrequency": { + "id": "NCIT:C64576", + "label": "Once" + }, + "interval": { + "start": "2020-09-02T00:00:00Z", + "end": "2020-09-02T00:00:00Z" + } + }] + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + }, + "adverseEvents": [{ + "id": "HP:0025637", + "label": "Vasospasm" + }], + "treatmentTerminationReason": { + "id": "NCIT:C41331", + "label": "Adverse Event" + } + }, { + "therapeuticRegimen": { + "ontologyClass": { + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" + }, + "startTime": { + "age": { + "iso8601duration": "P7M" + } + }, + "endTime": { + "age": { + "iso8601duration": "P8M" + } + }, + "regimenStatus": "COMPLETED" + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }, { + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }], + "files": [{ + "uri": "file://data/germlineWgs.vcf.gz", + "individualToFileIdentifiers": { + "proband A": "sample1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "3.34.0", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2021-07-27", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2021-06-10", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "2.7.3", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org/" + }, { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "2022-03-05", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "08/22/2022", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard/" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java index 69a46d8a..0a28729d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java @@ -1,5 +1,7 @@ package org.phenopackets.phenopackettools.cli.examples; +import org.ga4gh.vrs.v1.*; +import org.ga4gh.vrs.v1.Number; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.phenopackettools.builder.constants.Laterality; @@ -71,10 +73,10 @@ Interpretation interpretation() { * @return Genomic interpretation related to a somatic missense mutation in the RB1 gene. */ GenomicInterpretation somaticRb1Missense() { - AlleleBuilder abuilder = AlleleBuilder.builder(); - abuilder.sequenceId("refseq:NC_000013.11"); - abuilder.interbaseStartEnd( 48367511, 48367512); - abuilder.altAllele("T"); + AlleleBuilder abuilder = AlleleBuilder.builder() + .sequenceId("refseq:NC_000013.11") + .interbaseStartEnd( 48367511, 48367512) + .altAllele("T"); VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("rs121913300") .variation(abuilder.buildVariation()) .genomic() @@ -100,15 +102,31 @@ GenomicInterpretation somaticRb1Missense() { GenomicInterpretation germlineRb1Deletion() { - CopyNumberBuilder abuilder = CopyNumberBuilder.builder(); + CopyNumber cnv = CopyNumber.newBuilder() + .setDerivedSequenceExpression(DerivedSequenceExpression.newBuilder() + .setLocation(SequenceLocation.newBuilder() + .setSequenceId("refseq:NC_000013.14") + .setSequenceInterval(SequenceInterval.newBuilder() + .setStartNumber(Number.newBuilder(). + setValue(25981249) + .build()) + .setEndNumber(Number.newBuilder() + .setValue(61706822) + .build()) + .build()) + .build()) + .build()) + .setNumber(Number.newBuilder().setValue(1).build()) + .build(); //abuilder.copyNumberId("ga4gh:VCN.AFfJws1M4Lg8w1O3XknmHYc9TU2hHYpp"); // original coordinates in paper were given as 13q12.13q21.2(26,555,387–62,280,955 for hg19 //chr13 25981249 61706822 -- lifted over to hg38 + Variation variation = Variation.newBuilder() + .setCopyNumber(cnv) + .build(); - abuilder.alleleLocation("refseq:NC_000013.14",25981249, 61706822);//VRS uses inter-residue coordinates - abuilder.oneCopy(); VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder(); - vbuilder.variation(abuilder.buildVariation()); + vbuilder.variation(variation); vbuilder.mosaicism(40.0); VariantInterpretationBuilder vibuilder = VariantInterpretationBuilder.builder(vbuilder); vibuilder.pathogenic(); From f6c37ced846f0bd9f916706f3d8c6fe7b75d267f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 14:24:31 -0500 Subject: [PATCH 63/79] Add a phenopacket for demonstrating syntax validation. Signed-off-by: Daniel Danis --- .../src/examples/phenopackets/README.md | 28 +++++ .../examples/phenopackets/syntax-errors.json | 105 ++++++++++++++++++ .../src/examples/phenopackets/syntax.json | 3 - 3 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 phenopacket-tools-cli/src/examples/phenopackets/README.md create mode 100644 phenopacket-tools-cli/src/examples/phenopackets/syntax-errors.json delete mode 100644 phenopacket-tools-cli/src/examples/phenopackets/syntax.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/README.md b/phenopacket-tools-cli/src/examples/phenopackets/README.md new file mode 100644 index 00000000..749f2c0a --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/README.md @@ -0,0 +1,28 @@ +# README + +The folder contains a few phenopackets for demonstrating the validation functionality of *phenopacket-tools*. +The validator will report the validation issues, one issue per line. The next sections show different types +of validation errors that can be found using *phenopacket-tools*. + +## `syntax-errors.json` + +The `syntax-errors.json` is a phenopacket where several required attributes are missing. +The *phenopacket-tools* validator will point out the following issues: + +| Path | Message | Solution | +|:-------------------------------------|:-------------------------------|:---------------------------------------------------------------------| +| `$.id` | Is missing but it is required. | Add phenopacket ID. | +| `$.subject.id` | Is missing but it is required. | Add subject ID. | +| `$.phenotypicFeatures[0].type.label` | Is missing but it is required. | Add the `label` attribute into `$.phenotypicFeatures[0].type.label`. | + +## `semantic-errors.json` + +The `semantic-errors.json` is a phenopacket with no syntax errors. However, there are several semantic inconsistencies: + +| Path | Message | Solution | +|:------|:--------|:---------| +| | | | +| | | | + + +- `retinoblastoma.json` \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenopackets/syntax-errors.json b/phenopacket-tools-cli/src/examples/phenopackets/syntax-errors.json new file mode 100644 index 00000000..7d474f13 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/syntax-errors.json @@ -0,0 +1,105 @@ +{ + "subject": { + "dateOfBirth": "1998-01-01T00:00:00Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P3Y" + } + }, + "sex": "MALE", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001159" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0002090", + "label": "Pneumonia" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }, { + "type": { + "id": "HP:0000028", + "label": "Cryptorchidism" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0011109", + "label": "Chronic sinusitis" + }, + "severity": { + "id": "HP:0012828", + "label": "Severe" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003581", + "label": "Adult onset" + } + } + }], + "files": [{ + "uri": "file://data/file.vcf.gz", + "individualToFileIdentifiers": { + "kindred 1A": "SAME000234" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "vcf" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2020-07-13", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + } ], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [{ + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenopackets/syntax.json b/phenopacket-tools-cli/src/examples/phenopackets/syntax.json deleted file mode 100644 index 0e0dcd23..00000000 --- a/phenopacket-tools-cli/src/examples/phenopackets/syntax.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - -} \ No newline at end of file From 3cc716196b6fb33ae5fc3359f4f7a4cc3dcbfd37 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 14:28:09 -0500 Subject: [PATCH 64/79] Update wording in the `README.md` of the JSON schema folder. Signed-off-by: Daniel Danis --- .../phenopackettools/validator/jsonschema/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md index 0b5cc126..c18f9455 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md @@ -1,6 +1,7 @@ # README -This folder contains JSON schemas for validating top-level Phenopacket schema elements and VRS elements. +This folder contains JSON schemas for validating top-level Phenopacket Schema elements and the `Variation` element +embedded in the Phenopacket Schema. ## VRSATILE notes @@ -9,12 +10,12 @@ The datatype of the `VcfRecord.pos` field in `vrsatile.proto` is: uint64 pos = 3; ``` -Since Protobuf's `JSONFormat` serializes `uint64` fields into a JSON `string` instead of JSON `number`, -the JSON type of the `VcfRecord.pos` field is a: +Since Protobuf's `JSONFormat` serializes `uint64` fields into a JSON `string` instead of a JSON `number`, +the JSON schema element for validation of the `VcfRecord.pos` field is: ``` - "type": "string", - "pattern": "^[1-9][0-9]*$" +"type": "string", +"pattern": "^[1-9][0-9]*$" ``` instead of a more straightforward: From 26ce08650982b6ae83a7deca523ff0a23bca437f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 15:54:54 -0500 Subject: [PATCH 65/79] Absence of a required field in `VariationDescriptor` leads to a validation error. Absence of a `oneof` field in `Variation` leads to an error. Signed-off-by: Daniel Danis --- ...sonSchemaValidationWorkflowRunnerTest.java | 23 ++++++++++++++++++- .../validator/jsonschema/retinoblastoma.json | 6 +++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index a98749ae..526ef33d 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -226,12 +226,33 @@ public void checkVariantInterpretationConstraints(String path, String action, St @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/id, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.id: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/moleculeContext, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.moleculeContext: is missing but it is required'", }) public void checkVariationDescriptorConstraints(String path, String action, String expected) { testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected); } + /** + * As of Nov 9, 2022, the {@link org.ga4gh.vrs.v1.Variation} validator does not check presence + * of required fields. The validator can only check presence of {@code oneof} fields. + *

+ * Note that the {@code path} is split into a prefix and sub-path to increase legibility of the + * test parameters. + */ + @ParameterizedTest + @CsvSource({ + "/variation/copyNumber, DELETE, 'HERE.allele: is missing but it is required|HERE.haplotype: is missing but it is required|HERE.copyNumber: is missing but it is required|HERE.text: is missing but it is required|HERE.variationSet: is missing but it is required'", + }) + public void removingAOneOfFieldFromVariationProducesValidationError(String subPath, String action, String subExpected) { + String pathPrefix = "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor"; + String path = pathPrefix.concat(subPath); + + String validationMessagePrefix = "\\$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.variation"; + String expectedValidationMessage = subExpected.replaceAll("HERE", validationMessagePrefix); + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expectedValidationMessage); + } + /** * Absence of `term` leads to an {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel#ERROR}. */ diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json index f0228bbf..ca1fabdb 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json @@ -211,10 +211,12 @@ "acmgPathogenicityClassification": "PATHOGENIC", "therapeuticActionability": "ACTIONABLE", "variationDescriptor": { + "id": "example-cnv", + "moleculeContext": "genomic", "variation": { "copyNumber": { - "allele": { - "sequenceLocation": { + "derivedSequenceExpression": { + "location": { "sequenceId": "refseq:NC_000013.14", "sequenceInterval": { "startNumber": { From 800a82196bdc56aba0aa7729c090941434f62aab Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 16:14:38 -0500 Subject: [PATCH 66/79] Update expected test failure messages. Signed-off-by: Daniel Danis --- .../JsonSchemaValidationWorkflowRunnerTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index c5012653..6ade9cf6 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -226,8 +226,8 @@ public void checkVariantInterpretationConstraints(String path, String action, St @ParameterizedTest @CsvSource({ - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/id, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.id: is missing but it is required'", - "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/moleculeContext, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.moleculeContext: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/id, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.id' is missing but it is required", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/moleculeContext, DELETE, 'interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.moleculeContext' is missing but it is required", }) public void checkVariationDescriptorConstraints(String path, String action, String expected) { testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected); @@ -242,14 +242,14 @@ public void checkVariationDescriptorConstraints(String path, String action, Stri */ @ParameterizedTest @CsvSource({ - "/variation/copyNumber, DELETE, 'HERE.allele: is missing but it is required|HERE.haplotype: is missing but it is required|HERE.copyNumber: is missing but it is required|HERE.text: is missing but it is required|HERE.variationSet: is missing but it is required'", + "/variation/copyNumber, DELETE, 'REPLACE.allele' is missing but it is required|'REPLACE.haplotype' is missing but it is required|'REPLACE.copyNumber' is missing but it is required|'REPLACE.text' is missing but it is required|'REPLACE.variationSet' is missing but it is required", }) public void removingAOneOfFieldFromVariationProducesValidationError(String subPath, String action, String subExpected) { String pathPrefix = "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor"; String path = pathPrefix.concat(subPath); - String validationMessagePrefix = "\\$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.variation"; - String expectedValidationMessage = subExpected.replaceAll("HERE", validationMessagePrefix); + String validationMessagePrefix = "interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.variation"; + String expectedValidationMessage = subExpected.replaceAll("REPLACE", validationMessagePrefix); testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expectedValidationMessage); } From 72e8a2e0096473daefa7546437086ebfb2a5b0ae Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 17:11:10 -0500 Subject: [PATCH 67/79] Add examples for `missing-fields` and `missing-resources`. Signed-off-by: Daniel Danis --- .../src/examples/phenopackets/README.md | 44 ++++--- .../phenopackets/missing-fields-valid.json | 108 ++++++++++++++++++ ...syntax-errors.json => missing-fields.json} | 0 .../phenopackets/missing-resources-valid.json | 43 +++++++ .../phenopackets/missing-resources.json | 36 ++++++ .../src/examples/phenopackets/semantic.json | 3 - 6 files changed, 215 insertions(+), 19 deletions(-) create mode 100644 phenopacket-tools-cli/src/examples/phenopackets/missing-fields-valid.json rename phenopacket-tools-cli/src/examples/phenopackets/{syntax-errors.json => missing-fields.json} (100%) create mode 100644 phenopacket-tools-cli/src/examples/phenopackets/missing-resources-valid.json create mode 100644 phenopacket-tools-cli/src/examples/phenopackets/missing-resources.json delete mode 100644 phenopacket-tools-cli/src/examples/phenopackets/semantic.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/README.md b/phenopacket-tools-cli/src/examples/phenopackets/README.md index 749f2c0a..a963d9d2 100644 --- a/phenopacket-tools-cli/src/examples/phenopackets/README.md +++ b/phenopacket-tools-cli/src/examples/phenopackets/README.md @@ -4,25 +4,37 @@ The folder contains a few phenopackets for demonstrating the validation function The validator will report the validation issues, one issue per line. The next sections show different types of validation errors that can be found using *phenopacket-tools*. -## `syntax-errors.json` +## `missing-fields.json` -The `syntax-errors.json` is a phenopacket where several required attributes are missing. -The *phenopacket-tools* validator will point out the following issues: +The `missing-fields.json` is a phenopacket where several required attributes are missing. Presence of all required +attributes is checked at the beginning of the validation, before any other checks. The *phenopacket-tools* validator +will point out the following issues: -| Path | Message | Solution | -|:-------------------------------------|:-------------------------------|:---------------------------------------------------------------------| -| `$.id` | Is missing but it is required. | Add phenopacket ID. | -| `$.subject.id` | Is missing but it is required. | Add subject ID. | -| `$.phenotypicFeatures[0].type.label` | Is missing but it is required. | Add the `label` attribute into `$.phenotypicFeatures[0].type.label`. | +| Message | Solution | +|:------------------------------------------------------------------|:---------------------------------------------------------------------| +| `id` is missing but it is required. | Add phenopacket ID. | +| `subject.id` is missing but it is required. | Add subject ID. | +| `phenotypicFeatures[0].type.label` is missing but it is required. | Add the `label` attribute into `phenotypicFeatures[0].type.label`. | -## `semantic-errors.json` +See `missing-fields-valid.json` for a valid version of the phenopacket: -The `semantic-errors.json` is a phenopacket with no syntax errors. However, there are several semantic inconsistencies: +```shell +# Use UNIX diff to highlight differences between two files +diff missing-fields.json missing-fields-valid.json +``` -| Path | Message | Solution | -|:------|:--------|:---------| -| | | | -| | | | +## `missing-resources.json` - -- `retinoblastoma.json` \ No newline at end of file +The `missing-resources.json` is a phenopacket with no missing fields, so it passes the syntax validation. +However, the phenopacket is invalid because it uses ontologies that are not defined in the `metaData.resource` section. +The validator will point out the following issues: + +| Message | Solution | +|:--------------------------------------------------|:-----------------------------------------------------------| +| No ontology corresponding to ID 'NCBITaxon:9606' | Add a `Resource` for `NCBITaxon` into `metadata.resources` | + +See `missing-resources-valid.json` for a valid version of the phenopacket: + +```shell +diff missing-resources.json missing-resources-valid.json +``` diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-fields-valid.json b/phenopacket-tools-cli/src/examples/phenopackets/missing-fields-valid.json new file mode 100644 index 00000000..5445fede --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/missing-fields-valid.json @@ -0,0 +1,108 @@ +{ + "id": "missing-fields-valid-phenopacket-id", + "subject": { + "id": "example-subject-id", + "dateOfBirth": "1998-01-01T00:00:00Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P3Y" + } + }, + "sex": "MALE", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001159", + "label": "Syndactyly" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0002090", + "label": "Pneumonia" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }, { + "type": { + "id": "HP:0000028", + "label": "Cryptorchidism" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }, { + "type": { + "id": "HP:0011109", + "label": "Chronic sinusitis" + }, + "severity": { + "id": "HP:0012828", + "label": "Severe" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003581", + "label": "Adult onset" + } + } + }], + "files": [{ + "uri": "file://data/file.vcf.gz", + "individualToFileIdentifiers": { + "kindred 1A": "SAME000234" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "vcf" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2020-07-13", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + } ], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [{ + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenopackets/syntax-errors.json b/phenopacket-tools-cli/src/examples/phenopackets/missing-fields.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenopackets/syntax-errors.json rename to phenopacket-tools-cli/src/examples/phenopackets/missing-fields.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-resources-valid.json b/phenopacket-tools-cli/src/examples/phenopackets/missing-resources-valid.json new file mode 100644 index 00000000..61e09904 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/missing-resources-valid.json @@ -0,0 +1,43 @@ +{ + "id": "missing-resources-example", + "subject": { + "id": "subject-id", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001250", + "label": "Seizure" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2020-07-13", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [ + { + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + } + ] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-resources.json b/phenopacket-tools-cli/src/examples/phenopackets/missing-resources.json new file mode 100644 index 00000000..37851952 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/missing-resources.json @@ -0,0 +1,36 @@ +{ + "id": "missing-resources-example", + "subject": { + "id": "subject-id", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001250", + "label": "Seizure" + } + }], + "metaData": { + "created": "2021-07-01T19:32:35Z", + "createdBy": "HPO:probinson", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [ + { + "id": "PMID:20842687", + "description": "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter." + } + ] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenopackets/semantic.json b/phenopacket-tools-cli/src/examples/phenopackets/semantic.json deleted file mode 100644 index 0e0dcd23..00000000 --- a/phenopacket-tools-cli/src/examples/phenopackets/semantic.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - -} \ No newline at end of file From 8d5f288a033e5809faba2283703d71c6ec58707a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 17:20:08 -0500 Subject: [PATCH 68/79] Allow not writing the validation result header. Signed-off-by: Daniel Danis --- .../cli/command/ValidateCommand.java | 13 ++++++++++--- .../cli/writer/CSVValidationResultsWriter.java | 17 +++++++++++------ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index f9e13cde..f976538f 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -43,11 +43,15 @@ public static class ValidateSection { @CommandLine.Option(names = {"--require"}, arity = "*", description = "Path to JSON schema with additional requirements to enforce.") - protected List requirements = List.of(); + public List requirements = List.of(); @CommandLine.Option(names = "--hpo", description = "Path to hp.json file") - protected Path hpJson; + public Path hpJson; + + @CommandLine.Option(names = {"-H", "--no-header"}, + description = {"Do not print validation header", "Default: ${DEFAULT-VALUE}"}) + public boolean noHeader = false; } @Override @@ -66,7 +70,10 @@ protected Integer execute() { // (4) Write out the validation results into STDOUT. try { - CSVValidationResultsWriter writer = new CSVValidationResultsWriter(System.out, PHENOPACKET_TOOLS_VERSION, LocalDateTime.now()); + CSVValidationResultsWriter writer = new CSVValidationResultsWriter(System.out, + PHENOPACKET_TOOLS_VERSION, + LocalDateTime.now(), + validateSection.noHeader); writer.writeValidationResults(runner.validators(), results); return 0; } catch (IOException e) { diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java index d408c35b..a3e233c1 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java @@ -27,18 +27,21 @@ public class CSVValidationResultsWriter implements ValidationResultsWriter { private final OutputStream os; private final String phenopacketToolsVersion; private final LocalDateTime dateTime; + private final boolean noHeader; /** * Create the writer using a given {@link OutputStream}. Note that the {@link OutputStream} is not closed. * * @param os where to write to * @param phenopacketToolsVersion phenopacket tools version - * @param dateTime + * @param dateTime the time of validation + * @param noHeader skip writing header */ - public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime) { + public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime, boolean noHeader) { this.os = os; this.phenopacketToolsVersion = phenopacketToolsVersion; this.dateTime = dateTime; + this.noHeader = noHeader; } @Override @@ -50,7 +53,9 @@ public void writeValidationResults(List validators, List results, CSVPrinter printer) throws for (ValidatorInfo validator : results) { printer.printComment("validator_id=%s;validator_name=%s;description=%s".formatted(validator.validatorId(), validator.validatorName(), validator.description())); } + + // Print column names + printer.printRecord("PATH", "LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); } private static void printValidationResults(List results, CSVPrinter printer) throws IOException { - // Header - printer.printRecord("PATH", "LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); - // Validation results for (ValidationResultsAndPath rp : results) { String path = rp.path() == null ? "-" : rp.path().toAbsolutePath().toString(); for (ValidationResult result : rp.results().validationResults()) { From 02e9d6e7e151431db2e3d1fb513debf1917ab656 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 17:49:11 -0500 Subject: [PATCH 69/79] Use `$id` instead of `$schema` for custom JSON schema validators. Signed-off-by: Daniel Danis --- .../jsonschema/v2/JsonSchemaValidatorConfigurer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java index a1f866d6..30d3d520 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/v2/JsonSchemaValidatorConfigurer.java @@ -151,11 +151,11 @@ private static JsonSchemaNodeAndInfo readSchemaAndInfo(InputStream is) throws IO } private static ValidatorInfo decodeValidatorInfo(JsonNode schemaNode) { - String schema = getNodeAsTextOrDefaultText(schemaNode, "$schema", "UNKNOWN_SCHEMA"); + String id = getNodeAsTextOrDefaultText(schemaNode, "$id", "UNKNOWN_SCHEMA"); String title = getNodeAsTextOrDefaultText(schemaNode, "title", "UNKNOWN_TITLE"); String description = getNodeAsTextOrDefaultText(schemaNode, "description", "UNKNOWN VALIDATOR"); - return ValidatorInfo.of(schema, title, description); + return ValidatorInfo.of(id, title, description); } private static String getNodeAsTextOrDefaultText(JsonNode schemaNode, String fieldName, String defaultValue) { From 1560395cd11ca10f9e3cd933734f340ced70af0d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 17:51:23 -0500 Subject: [PATCH 70/79] Add example for using a custom JSON schema. Signed-off-by: Daniel Danis --- .../hpo-rare-disease-schema.json | 21 ++++++++----- .../marfan.no-phenotype.invalid.json | 24 +++++++++++++++ ...fan.no-time-at-last-encounter.invalid.json | 25 ++++++++++++++++ .../marfan.not-hpo.invalid.json | 30 +++++++++++++++++++ .../custom-json-schema/marfan.valid.json | 30 +++++++++++++++++++ 5 files changed, 122 insertions(+), 8 deletions(-) rename phenopacket-tools-cli/src/examples/{schemas => custom-json-schema}/hpo-rare-disease-schema.json (51%) create mode 100644 phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-phenotype.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/custom-json-schema/marfan.not-hpo.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/custom-json-schema/marfan.valid.json diff --git a/phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json b/phenopacket-tools-cli/src/examples/custom-json-schema/hpo-rare-disease-schema.json similarity index 51% rename from phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json rename to phenopacket-tools-cli/src/examples/custom-json-schema/hpo-rare-disease-schema.json index d56a28fd..7ba15846 100644 --- a/phenopacket-tools-cli/src/examples/schemas/hpo-rare-disease-schema.json +++ b/phenopacket-tools-cli/src/examples/custom-json-schema/hpo-rare-disease-schema.json @@ -1,9 +1,8 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema#", - "$id": "example.hpo.jsonschema.validator", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.com/hpo-rare-disease-validator", "title": "HPO Rare Disease Phenopacket Schema", - "description": "HPO Rare Disease Schema for GA4GH Phenopacket", - "_comment": "Here we require the phenopacket to have the following elements that are not required by the default schema 1. subject (proband being investigated) 2. at least one phenotypicFeature element 3. time_at_last encounter (subelement of subject), representing the age of the proband. In addition, we require that Human Phenotype Ontology (HPO) terms are used to represent phenotypicFeature", + "description": "An example JSON schema for validating a phenopacket in context of the rare-disease research", "type": "object", "properties": { "subject": { @@ -25,13 +24,19 @@ { "type": "object", "properties": { - "id": { - "type": "string", - "pattern": "^HP:\\([0-9]{7}$" + "type": { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^HP:\\d{7}$" + } + } } } } - ] + ], + "minItems": 1 } }, "required": [ diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-phenotype.invalid.json b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-phenotype.invalid.json new file mode 100644 index 00000000..095b1dc8 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-phenotype.invalid.json @@ -0,0 +1,24 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P27Y" + } + } + }, + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json new file mode 100644 index 00000000..65b335d8 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json @@ -0,0 +1,25 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.not-hpo.invalid.json b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.not-hpo.invalid.json new file mode 100644 index 00000000..3281c3f4 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.not-hpo.invalid.json @@ -0,0 +1,30 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P27Y" + } + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C26697", + "label": "Aortic Aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.valid.json b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.valid.json new file mode 100644 index 00000000..f918d3bf --- /dev/null +++ b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.valid.json @@ -0,0 +1,30 @@ +{ + "id": "id-C", + "subject": { + "id": "proband C", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P27Y" + } + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file From 2b8f0b312b41820a3f703fe43308a720000b329e Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 9 Nov 2022 22:33:28 -0500 Subject: [PATCH 71/79] Add examples for HPO validation, improve HPO validation message wording. Signed-off-by: Daniel Danis --- .../examples/{phenopackets => base}/README.md | 4 ++- .../missing-fields-valid.json | 0 .../missing-fields.json | 0 .../missing-resources-valid.json | 0 .../missing-resources.json | 0 ...n.annotation-propagation-rule.invalid.json | 27 ++++++++++++++++++ .../marfan.obsolete-term.invalid.json | 22 +++++++++++++++ .../phenotype-validation/marfan.valid.json | 28 +++++++++++++++++++ .../AbstractHpoAncestryValidator.java | 18 ++++++++++++ .../core/phenotype/base/BaseHpoValidator.java | 20 +++++++++++++ .../AbstractHpoPhenotypeValidator.java | 14 ++++++---- .../primary/CohortHpoPhenotypeValidator.java | 4 +-- .../primary/FamilyHpoPhenotypeValidator.java | 7 ++--- .../PhenopacketHpoPhenotypeValidator.java | 4 +-- .../PrimaryHpoPhenotypeValidatorTest.java | 12 ++++---- 15 files changed, 137 insertions(+), 23 deletions(-) rename phenopacket-tools-cli/src/examples/{phenopackets => base}/README.md (93%) rename phenopacket-tools-cli/src/examples/{phenopackets => base}/missing-fields-valid.json (100%) rename phenopacket-tools-cli/src/examples/{phenopackets => base}/missing-fields.json (100%) rename phenopacket-tools-cli/src/examples/{phenopackets => base}/missing-resources-valid.json (100%) rename phenopacket-tools-cli/src/examples/{phenopackets => base}/missing-resources.json (100%) create mode 100644 phenopacket-tools-cli/src/examples/phenotype-validation/marfan.annotation-propagation-rule.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/phenotype-validation/marfan.obsolete-term.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/README.md b/phenopacket-tools-cli/src/examples/base/README.md similarity index 93% rename from phenopacket-tools-cli/src/examples/phenopackets/README.md rename to phenopacket-tools-cli/src/examples/base/README.md index a963d9d2..a6928e46 100644 --- a/phenopacket-tools-cli/src/examples/phenopackets/README.md +++ b/phenopacket-tools-cli/src/examples/base/README.md @@ -1,6 +1,8 @@ # README -The folder contains a few phenopackets for demonstrating the validation functionality of *phenopacket-tools*. +The folder contains a few phenopackets for demonstrating the base validation functionality of *phenopacket-tools*; +the validation that any phenopacket must pass. + The validator will report the validation issues, one issue per line. The next sections show different types of validation errors that can be found using *phenopacket-tools*. diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-fields-valid.json b/phenopacket-tools-cli/src/examples/base/missing-fields-valid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenopackets/missing-fields-valid.json rename to phenopacket-tools-cli/src/examples/base/missing-fields-valid.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-fields.json b/phenopacket-tools-cli/src/examples/base/missing-fields.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenopackets/missing-fields.json rename to phenopacket-tools-cli/src/examples/base/missing-fields.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-resources-valid.json b/phenopacket-tools-cli/src/examples/base/missing-resources-valid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenopackets/missing-resources-valid.json rename to phenopacket-tools-cli/src/examples/base/missing-resources-valid.json diff --git a/phenopacket-tools-cli/src/examples/phenopackets/missing-resources.json b/phenopacket-tools-cli/src/examples/base/missing-resources.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenopackets/missing-resources.json rename to phenopacket-tools-cli/src/examples/base/missing-resources.json diff --git a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.annotation-propagation-rule.invalid.json b/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.annotation-propagation-rule.invalid.json new file mode 100644 index 00000000..3d3004a3 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.annotation-propagation-rule.invalid.json @@ -0,0 +1,27 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0004942", + "label": "Aortic aneurysm" + } + }, { + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.obsolete-term.invalid.json b/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.obsolete-term.invalid.json new file mode 100644 index 00000000..000231e6 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.obsolete-term.invalid.json @@ -0,0 +1,22 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002631", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json b/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json new file mode 100644 index 00000000..fc712eae --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json @@ -0,0 +1,28 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0004942", + "label": "Aortic aneurysm" + } + }, { + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + }, + "excluded": true + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java index bd41dc84..4da56932 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/ancestry/AbstractHpoAncestryValidator.java @@ -12,6 +12,8 @@ import org.phenopackets.phenopackettools.validator.core.phenotype.util.Util; import org.phenopackets.schema.v2.PhenopacketOrBuilder; import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.*; import java.util.stream.Stream; @@ -30,6 +32,8 @@ */ public abstract class AbstractHpoAncestryValidator extends BaseHpoValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractHpoAncestryValidator.class); + private static final ValidatorInfo VALIDATOR_INFO = ValidatorInfo.of( "HpoAncestryValidator", "HPO ancestry phenotypic feature validator", @@ -63,6 +67,11 @@ private Stream validatePhenopacketPhenotypicFeatures(String id // Check that the component does not contain both observed term and its ancestor. for (TermId observed : featuresByExclusion.observedPhenotypicFeatures()) { + if (isObsoleteTermId(observed)) { + LOGGER.debug("Ignoring unknown/obsolete term ID {}", observed.getValue()); + continue; + } + for (TermId ancestor : OntologyAlgorithm.getAncestorTerms(hpo, observed, false)) { if (featuresByExclusion.observedPhenotypicFeatures().contains(ancestor)) results.add(constructResultForAnObservedTerm(id, observed, ancestor, false)); @@ -73,6 +82,11 @@ private Stream validatePhenopacketPhenotypicFeatures(String id // Check that the component does not have negated descendant for (TermId excluded : featuresByExclusion.excludedPhenotypicFeatures()) { + if (isObsoleteTermId(excluded)) { + LOGGER.debug("Ignoring unknown/obsolete term ID {}", excluded.getValue()); + continue; + } + for (TermId child : OntologyAlgorithm.getDescendents(hpo, excluded)) { if (child.equals(excluded)) // skip the parent term @@ -85,6 +99,10 @@ private Stream validatePhenopacketPhenotypicFeatures(String id return results.build(); } + private boolean isObsoleteTermId(TermId termId) { + return hpo.getObsoleteTermIds().contains(termId); + } + private ValidationResult constructResultForAnObservedTerm(String id, TermId observedId, TermId ancestorId, boolean ancestorIsExcluded) { Term observedTerm = hpo.getTermMap().get(observedId); String observedTermName = observedTerm == null ? UNKNOWN : observedTerm.getName(); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java index a908efb6..6e824235 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/base/BaseHpoValidator.java @@ -3,6 +3,7 @@ import com.google.protobuf.MessageOrBuilder; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.PhenopacketValidator; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; import java.util.Objects; @@ -16,4 +17,23 @@ protected BaseHpoValidator(Ontology hpo) { // TODO - can be replaced by this.hpo.version() in the most recent phenol versions. this.hpoVersion = this.hpo.getMetaInfo().getOrDefault("data-version", "HPO"); } + + protected static String summarizePhenopacketAndIndividualId(PhenopacketOrBuilder phenopacket) { + // Build a string like / but only if one/other are present. + StringBuilder builder = new StringBuilder(); + String phenopacketId = phenopacket.getId(); + String individualId = phenopacket.getSubject().getId(); + if (!phenopacketId.isBlank() || !individualId.isBlank()) { + builder.append(" in "); + if (!phenopacketId.isBlank()) + builder.append(phenopacketId); + + if (!individualId.isBlank()) { + if (!phenopacketId.isBlank()) + builder.append("/"); + builder.append(individualId); + } + } + return builder.toString(); + } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java index 141dc0ef..d75cfc7f 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/AbstractHpoPhenotypeValidator.java @@ -6,6 +6,7 @@ import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.core.phenotype.base.BaseHpoValidator; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.stream.Stream; @@ -28,13 +29,14 @@ public ValidatorInfo validatorInfo() { return VALIDATOR_INFO; } - protected Stream checkPhenotypeFeature(String individualId, PhenotypicFeature feature) { + protected Stream checkPhenotypeFeature(PhenopacketOrBuilder phenopacket, PhenotypicFeature feature) { TermId termId; try { termId = TermId.of(feature.getType().getId()); } catch (PhenolRuntimeException e) { + String idSummary = summarizePhenopacketAndIndividualId(phenopacket); // Should not really happen if JsonSchema validators are run upstream, but let's stay safe. - String msg = "The %s found in '%s' is not a valid value".formatted(feature.getType().getId(), individualId); + String msg = "The %s found%s is not a valid term ID".formatted(feature.getType().getId(), idSummary); return Stream.of( ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) ); @@ -42,7 +44,8 @@ protected Stream checkPhenotypeFeature(String indivi if (termId.getPrefix().equals("HP")) { // Check if the HPO contains the term. if (!hpo.containsTerm(termId)) { - String msg = "%s in '%s' not found in %s".formatted(termId.getValue(), individualId, hpoVersion); + String idSummary = summarizePhenopacketAndIndividualId(phenopacket); + String msg = "%s%s not found in %s".formatted(termId.getValue(), idSummary, hpoVersion); return Stream.of( ValidationResult.error(VALIDATOR_INFO, INVALID_TERM_ID, msg) ); @@ -51,8 +54,9 @@ protected Stream checkPhenotypeFeature(String indivi // Check if the `termId` is a primary ID. // If not, this is a warning. TermId primaryId = hpo.getPrimaryTermId(termId); if (!primaryId.equals(termId)) { - String msg = "Using obsoleted id (%s) instead of current primary id (%s) in '%s'" - .formatted(termId.getValue(), primaryId.getValue(), individualId); + String idSummary = summarizePhenopacketAndIndividualId(phenopacket); + String msg = "Using obsolete id (%s) instead of current primary id (%s)%s".formatted( + termId.getValue(), primaryId.getValue(), idSummary); return Stream.of( ValidationResult.warning(VALIDATOR_INFO, OBSOLETED_TERM_ID, msg) ); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java index c396a0c1..0642f21e 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/CohortHpoPhenotypeValidator.java @@ -4,7 +4,6 @@ import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.CohortOrBuilder; import org.phenopackets.schema.v2.Phenopacket; -import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.ArrayList; @@ -21,9 +20,8 @@ public List validate(CohortOrBuilder component) { List results = new ArrayList<>(); for (Phenopacket member : component.getMembersList()) { - Individual subject = member.getSubject(); for (PhenotypicFeature feature : member.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(member, feature) .forEach(results::add); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java index ad65f9b2..65beef4e 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/FamilyHpoPhenotypeValidator.java @@ -4,7 +4,6 @@ import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.Phenopacket; -import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.ArrayList; @@ -23,18 +22,16 @@ public List validate(FamilyOrBuilder component) { // First check the proband. { Phenopacket proband = component.getProband(); - Individual subject = proband.getSubject(); for (PhenotypicFeature feature : proband.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(proband, feature) .forEach(results::add); } } // Then the relatives. for (Phenopacket relative : component.getRelativesList()) { - Individual subject = relative.getSubject(); for (PhenotypicFeature feature : relative.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(relative, feature) .forEach(results::add); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java index 96fda6c6..7b43a10e 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/primary/PhenopacketHpoPhenotypeValidator.java @@ -3,7 +3,6 @@ import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.ValidationResult; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.ArrayList; @@ -19,9 +18,8 @@ public PhenopacketHpoPhenotypeValidator(Ontology hpo) { public List validate(PhenopacketOrBuilder component) { List results = new ArrayList<>(); - Individual subject = component.getSubject(); for (PhenotypicFeature feature : component.getPhenotypicFeaturesList()) { - checkPhenotypeFeature(subject.getId(), feature) + checkPhenotypeFeature(component, feature) .forEach(results::add); } diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java index a9ba8567..d85048d4 100644 --- a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/PrimaryHpoPhenotypeValidatorTest.java @@ -85,7 +85,7 @@ public void testMissingTermId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("HP:0001182 in 'proband A' not found in http://purl.obolibrary.org/obo/hp/releases/2021-06-08/hp.json")); + assertThat(result.message(), equalTo("HP:0001182 in proband A not found in http://purl.obolibrary.org/obo/hp/releases/2021-06-08/hp.json")); } @Test @@ -116,7 +116,7 @@ public void testObsoleteTermId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.WARNING)); assertThat(result.category(), equalTo("Obsoleted TermId")); - assertThat(result.message(), equalTo("Using obsoleted id (HP:0001505) instead of current primary id (HP:0001166) in 'proband A'")); + assertThat(result.message(), equalTo("Using obsolete id (HP:0001505) instead of current primary id (HP:0001166) in proband A")); } @Test @@ -142,7 +142,7 @@ public void testMistypedTermId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0100807 found in 'proband A' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0100807 found in proband A is not a valid term ID")); } } @@ -247,7 +247,7 @@ public void testInvalidIdInProband() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0001238 found in 'Flynn' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0001238 found in Flynn is not a valid term ID")); } @Test @@ -295,7 +295,7 @@ public void testInvalidIdInRelative() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0001238 found in 'Walt' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0001238 found in Walt is not a valid term ID")); } } @@ -383,7 +383,7 @@ public void testInvalidId() throws Exception { ValidationResult result = results.get(0); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Invalid TermId")); - assertThat(result.message(), equalTo("The HP_0001238 found in 'Thing 1' is not a valid value")); + assertThat(result.message(), equalTo("The HP_0001238 found in Thing 1 is not a valid term ID")); } } From bd9b0caab8dd7623aa7c9e1afb37cb0f8ba1e2f8 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 10 Nov 2022 10:18:28 -0500 Subject: [PATCH 72/79] Add organ system validator, do not print validation header by default. Signed-off-by: Daniel Danis --- .../cli/command/ValidateCommand.java | 89 +++++++++++++++++-- .../writer/CSVValidationResultsWriter.java | 11 +-- .../orgsys/AbstractOrganSystemValidator.java | 14 --- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java index f976538f..ce2a34cf 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/command/ValidateCommand.java @@ -2,8 +2,10 @@ import com.google.protobuf.MessageOrBuilder; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.phenopackettools.core.PhenopacketElement; import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; import org.phenopackets.phenopackettools.validator.core.*; @@ -27,6 +29,9 @@ import java.time.LocalDateTime; import java.util.ArrayList; import java.util.List; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; @Command(name = "validate", description = "Validate top-level elements of the Phenopacket schema.", @@ -40,6 +45,10 @@ public class ValidateCommand extends BaseIOCommand { public ValidateSection validateSection = new ValidateSection(); public static class ValidateSection { + @CommandLine.Option(names = {"-H", "--include-header"}, + description = {"Include header in the output", "Default: ${DEFAULT-VALUE}"}) + public boolean includeHeader = false; + @CommandLine.Option(names = {"--require"}, arity = "*", description = "Path to JSON schema with additional requirements to enforce.") @@ -49,9 +58,11 @@ public static class ValidateSection { description = "Path to hp.json file") public Path hpJson; - @CommandLine.Option(names = {"-H", "--no-header"}, - description = {"Do not print validation header", "Default: ${DEFAULT-VALUE}"}) - public boolean noHeader = false; + @CommandLine.Option(names = {"-s", "--organ-system"}, + arity = "*", + description = {"Organ system HPO term IDs", + "Default: empty"}) + public List organSystems = List.of(); } @Override @@ -73,7 +84,7 @@ protected Integer execute() { CSVValidationResultsWriter writer = new CSVValidationResultsWriter(System.out, PHENOPACKET_TOOLS_VERSION, LocalDateTime.now(), - validateSection.noHeader); + validateSection.includeHeader); writer.writeValidationResults(runner.validators(), results); return 0; } catch (IOException e) { @@ -144,9 +155,10 @@ private List> configureSema // Right now we only have one semantic validator, but we'll extend this in the future. LOGGER.debug("Configuring semantic validators"); List> validators = new ArrayList<>(); + Ontology hpo = null; if (validateSection.hpJson != null) { - LOGGER.debug("Reading HPO from '{}}'", validateSection.hpJson.toAbsolutePath()); - Ontology hpo = OntologyLoader.loadOntology(validateSection.hpJson.toFile()); + LOGGER.debug("Reading HPO from {}", validateSection.hpJson.toAbsolutePath()); + hpo = OntologyLoader.loadOntology(validateSection.hpJson.toFile()); // The entire logic of this command stands and falls on correct state of `element` and the read message(s). // This method requires an appropriate combination of `T` and `element`, as described in Javadoc. @@ -171,11 +183,74 @@ private List> configureSema //noinspection unchecked validators.add((PhenopacketValidator) HpoPhenotypeValidators.Ancestry.cohortHpoAncestryValidator(hpo)); } - }; + } + } + + if (!validateSection.organSystems.isEmpty()) { + PhenopacketValidator validator = prepareOrganSystemValidator(hpo, validateSection.organSystems, inputSection.element); + if (validator != null) + validators.add(validator); + } LOGGER.debug("Configured {} semantic validator(s)", validators.size()); return validators; } + private static PhenopacketValidator prepareOrganSystemValidator(Ontology hpo, + List organSystems, + PhenopacketElement element) { + // Organ system validation can only be done when HPO is provided. + if (hpo == null) { + LOGGER.warn("Terms for organ system validation were provided but the path to HPO is unset. Use --hpo option to enable organ system validation."); + return null; + } + + // Prepare organ system IDs. + List organSystemIds = prepareOrganSystemIds(organSystems); + + // Create the validator. + if (!organSystemIds.isEmpty()) { + return switch (element) { + case PHENOPACKET -> //noinspection unchecked + (PhenopacketValidator) HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, organSystemIds); + case FAMILY -> //noinspection unchecked + (PhenopacketValidator) HpoPhenotypeValidators.OrganSystem.familyHpoOrganSystemValidator(hpo, organSystemIds); + case COHORT -> //noinspection unchecked + (PhenopacketValidator) HpoPhenotypeValidators.OrganSystem.cohortHpoOrganSystemValidator(hpo, organSystemIds); + }; + } + + return null; + } + + private static List prepareOrganSystemIds(List organSystems) { + LOGGER.trace("Found {} organ system IDs: {}", organSystems.size(), organSystems.stream() + .collect(Collectors.joining(", ", "{", "}"))); + List organSystemIds = organSystems.stream() + .map(toTermId()) + .flatMap(Optional::stream) + .toList(); + LOGGER.trace("{} organ system IDs are valid term IDs: {}", organSystemIds.size(), + organSystemIds.stream() + .map(TermId::getValue) + .collect(Collectors.joining(", ", "{", "}"))); + return organSystemIds; + } + + /** + * @return a function that maps a {@link String} into a {@link TermId} or emits a warning if the value + * cannot be mapped. + */ + private static Function> toTermId() { + return value -> { + try { + return Optional.of(TermId.of(value)); + } catch (PhenolRuntimeException e) { + LOGGER.warn("Invalid term ID {}", value); + return Optional.empty(); + } + }; + } + } \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java index a3e233c1..8676dacc 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/writer/CSVValidationResultsWriter.java @@ -27,7 +27,7 @@ public class CSVValidationResultsWriter implements ValidationResultsWriter { private final OutputStream os; private final String phenopacketToolsVersion; private final LocalDateTime dateTime; - private final boolean noHeader; + private final boolean printHeader; /** * Create the writer using a given {@link OutputStream}. Note that the {@link OutputStream} is not closed. @@ -35,13 +35,13 @@ public class CSVValidationResultsWriter implements ValidationResultsWriter { * @param os where to write to * @param phenopacketToolsVersion phenopacket tools version * @param dateTime the time of validation - * @param noHeader skip writing header + * @param printHeader print header into the output */ - public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime, boolean noHeader) { + public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime, boolean printHeader) { this.os = os; this.phenopacketToolsVersion = phenopacketToolsVersion; this.dateTime = dateTime; - this.noHeader = noHeader; + this.printHeader = printHeader; } @Override @@ -53,7 +53,8 @@ public void writeValidationResults(List validators, List checkPhenotypicFeatures(String individualId, Li return results.build(); } - - /** - * @return a function that maps {@link OntologyClass} into a {@link TermId} and emit warning otherwise. - */ - private static Function> toTermId(String individualId) { - return oc -> { - try { - return Optional.of(TermId.of(oc.getId())); - } catch (PhenolRuntimeException e) { - LOGGER.warn("Invalid term ID {} in individual {}", oc.getId(), individualId); - return Optional.empty(); - } - }; - } } From 74cc41281a16caa67bb0a68fa4ea0d247cd9594b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 10 Nov 2022 16:35:01 -0500 Subject: [PATCH 73/79] Add tutorial, improve the documentation. Signed-off-by: Daniel Danis --- docs/cli.rst | 5 +- docs/conf.py | 13 +- docs/converting.rst | 25 +- docs/examples.rst | 2 +- docs/index.rst | 1 + docs/tutorial.rst | 256 +++++++++++ docs/validation.rst | 13 +- phenopacket-tools-cli/pom.xml | 69 +-- .../src/assemble/distribution.xml | 7 +- .../convert/Schreckenbach-2014-TPM3-II.2.json | 396 ++++++++++++++++++ .../marfan.no-subject.invalid.json | 22 + .../examples/organ-systems/marfan.valid.json | 28 ++ .../validator/core/ValidatorInfoDefault.java | 4 +- 13 files changed, 781 insertions(+), 60 deletions(-) create mode 100644 docs/tutorial.rst create mode 100644 phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json create mode 100644 phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-subject.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/organ-systems/marfan.valid.json diff --git a/docs/cli.rst b/docs/cli.rst index 898cb137..a1a4ce42 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -179,11 +179,10 @@ Results are written into STDOUT in CSV/TSV format. The CSV output has a header, The header contains phenopacket-tools version, date time of validation, and list of validators that were run. A row with column names follows the header, and then the individual validation results. -.. - TODO - check the validation description. +.. TODO - check the validation description. Set up autocompletion ~~~~~~~~~~~~~~~~~~~~~ -TODO - write the section +.. TODO - write the section diff --git a/docs/conf.py b/docs/conf.py index 7c34361a..866a9c32 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,8 +23,17 @@ ############## project = 'phenopacket-tools' -copyright = '2022, Peter Robinson' -author = 'Peter Robinson' +copyright = '2022, Daniel Danis, Peter Robinson' +author = u'Daniel Danis, Peter Robinson' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.4' +# The full version, including alpha/beta/rc tags. +release = '0.4.7-SNAPSHOT' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/converting.rst b/docs/converting.rst index 39183a82..ff9191c7 100644 --- a/docs/converting.rst +++ b/docs/converting.rst @@ -1,4 +1,4 @@ -.. _rstcoverting: +.. _rstconverting: ========================== @@ -6,7 +6,6 @@ Converting V1 Phenopackets ========================== - The Phenopacket is a Global Alliance for Genomics and Health (GA4GH) standard for sharing disease and phenotype information. To build the standard, requirements and specifications were established through a community effort. Version 1 of the GA4GH standard was released in 2019 to elicit feedback from the community. @@ -14,14 +13,17 @@ Version 2 was developed on the basis of this feedback and should be used hencefo the fields used for reporting phenotype ontology terms are nearly identical and version 1 can easily be converted to version 2. Version 1 had fields for reporting variants but did not specify how the variants related to disease diagnoses that were reported. -The conversion methods provided by the phenopacket-tools library only convert the list of phenotype ontology terms by default. Optionally, -variants can be converted under the assumption that only one disease was specified in the ``diseases`` field of the version 1 phenopacket and that -the reported variants are interpreted to be causal for the disease. If this is not the case, then users would need to write new code to perform +The conversion methods provided by the *phenopacket-tools* library does not convert the variants by default. +The variants can be converted under the assumption that only one disease was specified in the ``diseases`` field +of the version 1 phenopacket and that the reported variants are interpreted to be causal for the disease. +If this is not the case, then users would need to write new code to perform the conversion according to the logic of their application. To use library code for converting a phenopacket, adapt the following. +.. TODO - point to JavaDoc + .. code-block:: java boolean convertVariants = true; // or false, as desired @@ -29,11 +31,14 @@ To use library code for converting a phenopacket, adapt the following. Phenopacket v2 = converter.convertPhenopacket(v1Phenopacket); -Alternatively, use the ``ConvertCommand`` in the phenopacket-tools-cli module to perform conversion. Both of the following -commands print output to the shell (optionally use the ``-o filename`` option to write to an outfile. +Alternatively, use the ``convert`` command of the command-line interface. +Both of the following commands print output to the standard output. .. code-block:: bash - alias pfx="java -jar phenopacket-tools-cli/target/phenopacket-tools-cli-0.4.6-SNAPSHOT.jar" - pfx /path/to/v1phenopacket.json - pdf /path/to/v1phenopacket.json --convert-variants \ No newline at end of file + pfx convert -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json + pfx convert --convert-variants -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json + +.. note:: + The commands above assume `pfx` is an alias and ``${examples}`` points ot location of examples folder, + both set up in :ref:`rsttutorial`. diff --git a/docs/examples.rst b/docs/examples.rst index 262d7079..cef98ed9 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -8,4 +8,4 @@ Example YAML files can be viewed for the following: * `Phenopackets `_ * `Familes `_ -* `Cohorts `_ \ No newline at end of file +* `Cohorts `_ diff --git a/docs/index.rst b/docs/index.rst index c158c4e3..d83e40aa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,6 +35,7 @@ how to use the CLI application on your system. :maxdepth: 1 :caption: Contents: + tutorial creating validation converting diff --git a/docs/tutorial.rst b/docs/tutorial.rst new file mode 100644 index 00000000..f97cc39d --- /dev/null +++ b/docs/tutorial.rst @@ -0,0 +1,256 @@ +.. _rsttutorial: + +======== +Tutorial +======== + +This tutorial walks through the installation of *phenopacket-tools* and provides an overview +of the command-line interface functionality. + +Setup +===== + +*Phenopacket-tools* is distributed as a ZIP archive that contains an executable JAR file +and several resource files for running this tutorial. Let's check that Java is installed on the machine, +download the distribution ZIP and set up an alias as a shortcut for running the *phenopacket-tools*. + +Prerequisites +^^^^^^^^^^^^^ + +*Phenopacket-tools* is written in Java 17 and requires Java 17 or better to run. An appropriate Java executable +must be present on your ``$PATH``. Run the following to determine the availability and version of Java on your machine:: + + java -version + +which prints a similar output for Java 17:: + + openjdk version "17" 2021-09-14 + OpenJDK Runtime Environment (build 17+35-2724) + OpenJDK 64-Bit Server VM (build 17+35-2724, mixed mode, sharing) + +Download *phenopacket-tools* +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A prebuilt distribution ZIP file is available for download from +`phenopacket-tools release section `_ +of the GitHub repository. + +Download and unpack the ZIP file from the releases section: + +.. parsed-literal:: + + URL=https://github.com/phenopackets/phenopacket-tools/releases/download/v\ |release|\ /phenopacket-tools-cli-|release|-distribution.zip + curl -o phenopacket-tools-cli-|release|-distribution.zip ${URL} + unzip phenopacket-tools-cli-|release|-distribution.zip + +Set up alias +^^^^^^^^^^^^ + +In general, Java command line applications are invoked as ``java -jar executable.jar``. However, this is just +too verbose and we can shorten the command by defining an alias. + +Let's define an alias for *phenopacket-tools*. Assuming the distribution ZIP was unpacked into +phenopacket-tools-cli-|release| directory, run the following to set up the alias: + +.. parsed-literal:: + alias pxf="java -jar $(pwd)/phenopacket-tools-cli-\ |release|\ /phenopacket-tools-cli-|release|.jar" + +Now, let's check that the alias works by printing the help message: + +.. parsed-literal:: + pxf --help + +Convert +======= + +Version 1 of the GA4GH Phenopacket schema was released in 2019 to elicit community feedback. +In response to this feedback, the schema was extended and refined and version 2 was released in 2021 +and published in 2022 by the International Standards Organization (ISO). + +The `convert` command of *phenopacket-tools* converts version 1 phenopackets into version 2. In this tutorial, +we will convert 384 v1 phenopackets published by Robinson et al., 2020\ [1]_ into version 2. The phenopackets +represent 384 individuals described in published case reports with Human Phenotype Ontology terms, +causal genetic variants, and OMIM disease identifiers. + +Let's start by downloading and unpacking the phenopacket dataset. +The phenopacket dataset is available for download from Zenodo\ [2]_. Then, we extract the archive content into +a folder named as ``v1``:: + + curl -o phenopackets.v1.zip https://zenodo.org/record/3905420/files/phenopackets.zip + unzip -d v1 phenopackets.v1.zip + +Due to differences between version 1 and 2, there are two ways how to convert *v1* phenopackets into *v2*. +Briefly, the conversion either assumes that the `Variant`s are *causal* with respect to a `Disease` of the +v1 phenopacket, or skips conversion of `Variant`s altogether. The logic is controlled with ``--convert-variants`` +CLI option and the conversion can be done iff the *v1* phenopacket has one `Disease`. +See the :ref:`rstconverting` section for more information. + +Let's convert all *v1* phenopackets and store the results in JSON format in a new folder ``v2``:: + + # Make the folder for converted phenopackets. + mkdir -p v2 + + # Convert the phenopackets. + for pp in $(find v1 -name "*.json"); do + pp_name=$(basename ${pp}) + pxf convert --convert-variants -i ${pp} > v2/${pp_name} + done + + printf "Converted %s phenopackets\n" $(ls v2/ | wc -l) + +We converted 384 phenopackets into *v2* format and stored the JSON files in the ``v2`` folder. + +Validate +======== + +The `validate` command of *phenopacket-tools* validates correctness of phenopackets, families and cohorts. +This section focuses on the *off-the-shelf* phenopacket validators. +See the :ref:`rstvalidation` and the `Java Documentation`_ to learn how to implement a custom validator. + +We will work with a suite of phenopackets that are bundled in the *phenopacket-tools* distribution ZIP file. +The phenopackets are located in `examples` folder next to the executable JAR file: + +.. parsed-literal:: + examples=$(pwd)/phenopacket-tools-cli-\ |release|\ /examples + +We will describe each validation and show an example validation errors and a proposed solution in a table. + +.. note:: + The validation examples use `Phenopacket`\ s, but the validation functionality is available for all top-level Phenopacket Schema + elements, including `Cohort` and `Family`. +.. note:: + The validation is implemented for *v2* phenopackets only. The *v1* phenopackets must be converted to *v2* prior + running validation. + + +Base validation +^^^^^^^^^^^^^^^ + +First, let's check if the phenopackets meet the base requirements, as described by the Phenopacket Schema. +All phenopackets, regardless of their aim or scope must pass this requirement to be valid. + +All required fields must be present +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `BaseValidator` checks that all required fields are not empty:: + + pxf validate -i ${examples}/base/missing-fields.json + +The validator emits 3 lines with the following issues: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + 'id' is missing but it is required, Add the phenopacket ID + 'subject.id' is missing but it is required, Add the subject ID + 'phenotypicFeatures[0].type.label' is missing but it is required, Add the `label` attribute into the `type` of the first phenotypic feature + + +All ontologies are defined +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Phenopacket Schema relies heavily on use of ontologies and ontology concepts. `MetaData` element lists +the ontologies used in the particular phenopacket. + +The `MetaDataValidator` checks if the `MetaData` has an ontology `Resource` for all concepts used in the phenopacket:: + + pxf validate -i ${examples}/base/missing-resources.json + +The validator points out the absence of `NCBITaxon` definition: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + No ontology corresponding to ID 'NCBITaxon:9606' found in MetaData, Add a `Resource` element with `NCBITaxon` definition into `MetaData` + + +Custom validation rules +^^^^^^^^^^^^^^^^^^^^^^^ + +Projects or consortia can enforce specific requirements by designing a custom JSON schema. +For instance, a rare disease project may require presence of several elements that are not required by the default schema: + +1. Subject (proband being investigated) +2. At least one `PhenotypicFeature` element and using HPO terms for phenotypic features +3. Time at last encounter (sub-element of subject), representing the age of the proband + +*Phenopacket-tools* ships with a JSON schema for enforcing the above requirements. +The schema is located at ``examples/custom-json-schema/hpo-rare-disease-schema.json``. + +Using the custom JSON schema via ``--require`` option will point out issues in the 4 example phenopackets:: + + pxf validate --require ${examples}/custom-json-schema/hpo-rare-disease-schema.json \ + -i ${examples}/custom-json-schema/marfan.no-subject.invalid.json \ + -i ${examples}/custom-json-schema/marfan.no-phenotype.invalid.json \ + -i ${examples}/custom-json-schema/marfan.not-hpo.invalid.json \ + -i ${examples}/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + 'subject' is missing but it is required, Add the `Subject` element + 'phenotypicFeatures' is missing but it is required, Add at least one `PhenotypicFeature` + 'phenotypicFeatures[0].type.id' does not match the regex pattern ``^HP:\d{7}$``, Use Human Phenotype Ontology in `PhenotypicFeature`\ s + 'subject.timeAtLastEncounter' is missing but it is required, Add the time at last encounter field + + +Phenotype validation +^^^^^^^^^^^^^^^^^^^^ + +*Phenopacket-tools* offers a validator for checking logical consistency of phenotypic features in the phenopacket. +The phenotype validation requires the Human Phenotype Ontology (HPO) file to work. + +.. note:: + The examples below assume that the latest HPO in JSON format has been downloaded to ``hp.json``. + The HPO file can be downloaded from `HPO releases`_. + + +Phenopackets use non-obsolete term IDs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `HpoPhenotypeValidator` checks if the phenopacket contains obsolete HPO terms:: + + pxf validate --hpo hp.json -i ${examples}/phenotype-validation/marfan.obsolete-term.invalid.json + +It turns out that ``marfan.obsolete-term.invalid.json`` uses an obsolete ``HP:0002631`` instead of +the primary ``HP:0002616`` for *Aortic root aneurysm*: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + Using obsolete id (HP:0002631) instead of current primary id (HP:0002616) in id-C, Replace the obsolete ID with the primary ID + + +The annotation-propagation rule is not violated +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Due to annotation propagation rule, it is a logical error to use both a term and its ancestor +(e.g. *Arachnodactyly* and *Abnormality of finger*). +When choosing HPO terms for phenotypic features, the *most* specific terms should be used for the *observed* clinical features. +In contrary, the *least* specific terms should be used for the *excluded* clinical features. + +The `HpoAncestryValidator` checks that the annotation propagation rule is not violated:: + + pxf validate --hpo hp.json -i ${examples}/phenotype-validation/marfan.annotation-propagation-rule.invalid.json + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + "Phenotypic features of id-C must not contain both an observed term (Aortic root aneurysm, HP:0002616) and an observed ancestor (Aortic aneurysm, HP:0004942)", Remove the less specific term + +.. note:: + Presence of excluded descendant and observed ancestor does not violate the annotation propagation rule. + A phenopacket with excluded *Aortic root aneurysm* and present *Aortic aneurysm* is valid, + see ``marfan.valid.json``. + +.. TODO - Organ system validation + +.. [1] https://pubmed.ncbi.nlm.nih.gov/32755546 +.. [2] https://zenodo.org/record/3905420 +.. _Java Documentation: https://javadoc.io/doc/org.phenopackets.phenopackettools/phenopacket-tools-validator-core/latest/org.phenopackets.phenopackettools.validator.core/module-summary.html +.. _HPO releases: https://hpo.jax.org/app/data/ontology \ No newline at end of file diff --git a/docs/validation.rst b/docs/validation.rst index 1c87acb6..1f174118 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -1,13 +1,10 @@ -.. _rstvalidating: +.. _rstvalidation: ======================= Validating Phenopackets ======================= - - - Protobuf ^^^^^^^^ Phenopackets schema uses protobuf, an exchange format developed @@ -25,16 +22,16 @@ other formats. Validation ^^^^^^^^^^ -The phenopacket-tools library offers JSON-Schema-based and semantic validations. The syntactic validation +The *phenopacket-tools* library offers JSON-Schema-based and semantic validations. The syntactic validation is done using JSON schema. Additionally, an interface is provided to perform arbitrary kinds of validation. -This validation should be performed for all phenophenopackets. +This validation should be performed for all phenopackets. Additional constraints and requirements may be made for phenopackets that are used in a specific project or for a specific collaboration or consortium. For instance, a rare-disease consortium may require that all phenotypic features be recorded using valid HPO terms. An example class is provided that checks all ``PhenotypicFeature`` elements, ensures that they use HPO terms with valid (i.e., primary) id's, and checks whether both a term and an ancestor of the term are used - if so -a wanring is emitted, because an annotation with a specific HPO term +a warning is emitted, because an annotation with a specific HPO term (e.g., `Perimembranous ventricular septal defect `_) implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has `Ventricular septal defect `_). @@ -64,6 +61,6 @@ In code, this can be implemented as follows. System.out.println("Error opening the phenopacket: " + e); } - +.. TODO - continue diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index fc2f04d6..7d345f6f 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -63,37 +63,42 @@ - - - - src/main/resources - true - - - - - org.springframework.boot - spring-boot-maven-plugin - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/assemble/distribution.xml - - - - - make-assembly - package - - single - - - - - - + + + release + + + + src/main/resources + true + + + + + org.springframework.boot + spring-boot-maven-plugin + + + org.apache.maven.plugins + maven-assembly-plugin + + + src/assemble/distribution.xml + + + + + make-assembly + package + + single + + + + + + + + diff --git a/phenopacket-tools-cli/src/assemble/distribution.xml b/phenopacket-tools-cli/src/assemble/distribution.xml index 47440080..c4d093d9 100644 --- a/phenopacket-tools-cli/src/assemble/distribution.xml +++ b/phenopacket-tools-cli/src/assemble/distribution.xml @@ -30,8 +30,11 @@ ${project.basedir}/src/examples ./examples - phenopackets/* - schemas/* + base/* + convert/* + custom-json-schema/* + organ-systems/* + phenotype-validation/* diff --git a/phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json b/phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json new file mode 100644 index 00000000..96a13fbe --- /dev/null +++ b/phenopacket-tools-cli/src/examples/convert/Schreckenbach-2014-TPM3-II.2.json @@ -0,0 +1,396 @@ +{ + "id": "PMID:24239060-Schreckenbach-2014-TPM3-II.2", + "subject": { + "id": "II.2", + "ageAtCollection": { + "age": "P45Y" + }, + "sex": "FEMALE", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "Homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002527", + "label": "Falls" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0001260", + "label": "Dysarthria" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003391", + "label": "Gowers sign" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0009046", + "label": "Difficulty running" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000347", + "label": "Micrognathia" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003691", + "label": "Scapular winging" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002913", + "label": "Myoglobinuria" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0001265", + "label": "Hyporeflexia" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000275", + "label": "Narrow face" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002650", + "label": "Scoliosis" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000651", + "label": "Diplopia" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002515", + "label": "Waddling gait" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002495", + "label": "Impaired vibratory sensation" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0010830", + "label": "Impaired tactile sensation" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003202", + "label": "Skeletal muscle atrophy" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0002705", + "label": "High, narrow palate" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003326", + "label": "Myalgia" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0040129", + "label": "Abnormal nerve conduction velocity" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0003701", + "label": "Proximal muscle weakness" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }, { + "type": { + "id": "HP:0000508", + "label": "Ptosis" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + } + }] + }], + "genes": [{ + "id": "NCBIGene:7170", + "symbol": "TPM3" + }], + "variants": [{ + "vcfAllele": { + "genomeAssembly": "GRCh37", + "chr": "1", + "pos": 154145610, + "ref": "G", + "alt": "T" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:609284", + "label": "NEMALINE MYOPATHY 1; NEM1CAP MYOPATHY 1, INCLUDED; CAPM1, INCLUDED" + } + }], + "metaData": { + "createdBy": "Hpo Case Annotator : 1.0.13-SNAPSHOT", + "submittedBy": "HPO:probinson", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "pato", + "name": "Phenotype And Trait Ontology", + "url": "http://purl.obolibrary.org/obo/pato.owl", + "version": "2018-03-28", + "namespacePrefix": "PATO", + "iriPrefix": "http://purl.obolibrary.org/obo/PATO_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2018-03-02", + "namespacePrefix": "NCBITaxon" + }, { + "id": "eco", + "name": "Evidence and Conclusion Ontology", + "url": "http://purl.obolibrary.org/obo/eco.owl", + "version": "2018-11-10", + "namespacePrefix": "ECO", + "iriPrefix": "http://purl.obolibrary.org/obo/ECO_" + }, { + "id": "omim", + "name": "Online Mendelian Inheritance in Man", + "url": "https://www.omim.org", + "namespacePrefix": "OMIM" + }], + "phenopacketSchemaVersion": "1.0.0-RC3", + "externalReferences": [{ + "id": "PMID:24239060", + "description": "Novel TPM3 mutation in a family with cap myopathy and review of the literature" + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-subject.invalid.json b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-subject.invalid.json new file mode 100644 index 00000000..56ef72b1 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-subject.invalid.json @@ -0,0 +1,22 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/organ-systems/marfan.valid.json b/phenopacket-tools-cli/src/examples/organ-systems/marfan.valid.json new file mode 100644 index 00000000..fc712eae --- /dev/null +++ b/phenopacket-tools-cli/src/examples/organ-systems/marfan.valid.json @@ -0,0 +1,28 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0004942", + "label": "Aortic aneurysm" + } + }, { + "type": { + "id": "HP:0002616", + "label": "Aortic root aneurysm" + }, + "excluded": true + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java index 053c5a61..fce2e5b0 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfoDefault.java @@ -3,7 +3,7 @@ record ValidatorInfoDefault(String validatorId, String validatorName, String description) implements ValidatorInfo { - static final ValidatorInfoDefault BASE = new ValidatorInfoDefault("Base", "Base syntax validator", "The base syntax validation of a phenopacket, family, or cohort"); - static final ValidatorInfoDefault INPUT_VALIDATOR = new ValidatorInfoDefault("Input", "Data format validator", "The validator for checking data format issues (e.g. presence of a required field in JSON document)"); + static final ValidatorInfoDefault BASE = new ValidatorInfoDefault("BaseValidator", "Base syntax validator", "The base syntax validation of a phenopacket, family, or cohort"); + static final ValidatorInfoDefault INPUT_VALIDATOR = new ValidatorInfoDefault("InputValidator", "Data format validator", "The validator for checking data format issues (e.g. presence of a required field in JSON document)"); } From 2c7522fc873b8dd84692f50436d61eefc8a42b56 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 11 Nov 2022 11:48:00 -0500 Subject: [PATCH 74/79] Extend documentation, rearrange the examples. Signed-off-by: Daniel Danis --- docs/tutorial.rst | 69 ++++++++--- docs/tutorial_examples.rst | 114 ++++++++++++++++++ docs/validation.rst | 89 ++++++++++++++ .../src/assemble/distribution.xml | 6 +- .../examples/{ => validate}/base/README.md | 0 .../base/missing-fields-valid.json | 0 .../{ => validate}/base/missing-fields.json | 0 .../base/missing-resources-valid.json | 0 .../base/missing-resources.json | 0 .../hpo-rare-disease-schema.json | 0 .../marfan.no-phenotype.invalid.json | 0 .../marfan.no-subject.invalid.json | 0 ...fan.no-time-at-last-encounter.invalid.json | 0 .../marfan.not-hpo.invalid.json | 0 .../custom-json-schema/marfan.valid.json | 0 ...fan.all-organ-system-annotated.valid.json} | 18 ++- ...marfan.missing-eye-annotation.invalid.json | 32 +++++ .../marfan.no-abnormalities.valid.json | 40 ++++++ ...n.annotation-propagation-rule.invalid.json | 0 .../marfan.obsolete-term.invalid.json | 0 .../phenotype-validation}/marfan.valid.json | 0 .../orgsys/AbstractOrganSystemValidator.java | 11 +- .../phenotype/OrganSystemValidatorTest.java | 2 +- 23 files changed, 349 insertions(+), 32 deletions(-) create mode 100644 docs/tutorial_examples.rst rename phenopacket-tools-cli/src/examples/{ => validate}/base/README.md (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/base/missing-fields-valid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/base/missing-fields.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/base/missing-resources-valid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/base/missing-resources.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/custom-json-schema/hpo-rare-disease-schema.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/custom-json-schema/marfan.no-phenotype.invalid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/custom-json-schema/marfan.no-subject.invalid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/custom-json-schema/marfan.not-hpo.invalid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/custom-json-schema/marfan.valid.json (100%) rename phenopacket-tools-cli/src/examples/{phenotype-validation/marfan.valid.json => validate/organ-systems/marfan.all-organ-system-annotated.valid.json} (62%) create mode 100644 phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json create mode 100644 phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json rename phenopacket-tools-cli/src/examples/{ => validate}/phenotype-validation/marfan.annotation-propagation-rule.invalid.json (100%) rename phenopacket-tools-cli/src/examples/{ => validate}/phenotype-validation/marfan.obsolete-term.invalid.json (100%) rename phenopacket-tools-cli/src/examples/{organ-systems => validate/phenotype-validation}/marfan.valid.json (100%) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index f97cc39d..4c7fbcae 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -50,16 +50,13 @@ In general, Java command line applications are invoked as ``java -jar executable too verbose and we can shorten the command by defining an alias. Let's define an alias for *phenopacket-tools*. Assuming the distribution ZIP was unpacked into -phenopacket-tools-cli-|release| directory, run the following to set up the alias: +phenopacket-tools-cli-|release| directory, run the following to set up the alias and to check that the alias works: .. parsed-literal:: alias pxf="java -jar $(pwd)/phenopacket-tools-cli-\ |release|\ /phenopacket-tools-cli-|release|.jar" - -Now, let's check that the alias works by printing the help message: - -.. parsed-literal:: pxf --help + Convert ======= @@ -113,6 +110,9 @@ The phenopackets are located in `examples` folder next to the executable JAR fil .. parsed-literal:: examples=$(pwd)/phenopacket-tools-cli-\ |release|\ /examples +.. note:: + See :ref:`rsttutorialexamples` for detailed info of the example phenopackets. + We will describe each validation and show an example validation errors and a proposed solution in a table. .. note:: @@ -129,12 +129,14 @@ Base validation First, let's check if the phenopackets meet the base requirements, as described by the Phenopacket Schema. All phenopackets, regardless of their aim or scope must pass this requirement to be valid. +See :ref:`rstbasevalidation` for more details. + All required fields must be present ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `BaseValidator` checks that all required fields are not empty:: - pxf validate -i ${examples}/base/missing-fields.json + pxf validate -i ${examples}/validate/base/missing-fields.json The validator emits 3 lines with the following issues: @@ -155,7 +157,7 @@ the ontologies used in the particular phenopacket. The `MetaDataValidator` checks if the `MetaData` has an ontology `Resource` for all concepts used in the phenopacket:: - pxf validate -i ${examples}/base/missing-resources.json + pxf validate -i ${examples}/validate/base/missing-resources.json The validator points out the absence of `NCBITaxon` definition: @@ -181,11 +183,11 @@ The schema is located at ``examples/custom-json-schema/hpo-rare-disease-schema.j Using the custom JSON schema via ``--require`` option will point out issues in the 4 example phenopackets:: - pxf validate --require ${examples}/custom-json-schema/hpo-rare-disease-schema.json \ - -i ${examples}/custom-json-schema/marfan.no-subject.invalid.json \ - -i ${examples}/custom-json-schema/marfan.no-phenotype.invalid.json \ - -i ${examples}/custom-json-schema/marfan.not-hpo.invalid.json \ - -i ${examples}/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json + pxf validate --require ${examples}/validate/custom-json-schema/hpo-rare-disease-schema.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-subject.invalid.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-phenotype.invalid.json \ + -i ${examples}/validate/custom-json-schema/marfan.not-hpo.invalid.json \ + -i ${examples}/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json .. csv-table:: :header: "Validation error", "Solution" @@ -196,6 +198,10 @@ Using the custom JSON schema via ``--require`` option will point out issues in t 'phenotypicFeatures[0].type.id' does not match the regex pattern ``^HP:\d{7}$``, Use Human Phenotype Ontology in `PhenotypicFeature`\ s 'subject.timeAtLastEncounter' is missing but it is required, Add the time at last encounter field +See :ref:`rstcustomvalidation` for more details. + + +.. _rstphenotypevalidationtutorial: Phenotype validation ^^^^^^^^^^^^^^^^^^^^ @@ -207,13 +213,15 @@ The phenotype validation requires the Human Phenotype Ontology (HPO) file to wor The examples below assume that the latest HPO in JSON format has been downloaded to ``hp.json``. The HPO file can be downloaded from `HPO releases`_. +See :ref:`rstphenotypevalidation` for more details. + Phenopackets use non-obsolete term IDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `HpoPhenotypeValidator` checks if the phenopacket contains obsolete HPO terms:: - pxf validate --hpo hp.json -i ${examples}/phenotype-validation/marfan.obsolete-term.invalid.json + pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.obsolete-term.invalid.json It turns out that ``marfan.obsolete-term.invalid.json`` uses an obsolete ``HP:0002631`` instead of the primary ``HP:0002616`` for *Aortic root aneurysm*: @@ -235,7 +243,7 @@ In contrary, the *least* specific terms should be used for the *excluded* clinic The `HpoAncestryValidator` checks that the annotation propagation rule is not violated:: - pxf validate --hpo hp.json -i ${examples}/phenotype-validation/marfan.annotation-propagation-rule.invalid.json + pxf validate --hpo hp.json -i ${examples}/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json .. csv-table:: :header: "Validation error", "Solution" @@ -248,7 +256,38 @@ The `HpoAncestryValidator` checks that the annotation propagation rule is not vi A phenopacket with excluded *Aortic root aneurysm* and present *Aortic aneurysm* is valid, see ``marfan.valid.json``. -.. TODO - Organ system validation + +Annotation of organ systems +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We can validate presence of annotation for specific organ systems in a phenopacket. + +Using the term IDs of the top-level HPO terms, we can validate annotation of +`Eye `_, +`Cardiovascular `_, and +`Respiratory `_ organ systems +in 3 phenopackets of toy `Marfan syndrome `_ patients:: + + pxf validate --hpo hp.json \ + --organ-system HP:0000478 --organ-system HP:0001626 --organ-system HP:0002086 \ + -i ${examples}/validate/organ-systems/marfan.all-organ-system-annotated.valid.json \ + -i ${examples}/validate/organ-systems/marfan.missing-eye-annotation.invalid.json \ + -i ${examples}/validate/organ-systems/marfan.no-abnormalities.valid.json + +.. note:: + Organ system validation requires HPO ontology. See the :ref:`rstphenotypevalidationtutorial` for more details about getting + the HPO file. + +The `HpoOrganSystemValidator` will point out one error in the `marfan.missing-eye-annotation.invalid.json` phenopacket: + +.. csv-table:: + :header: "Validation error", "Solution" + :widths: 350, 550 + + Missing annotation for Abnormality of the eye [HP:0000478] in id-C, Annotate the eye or exclude any abnormality. + +See :ref:`rstorgsysvalidation` for more details. + .. [1] https://pubmed.ncbi.nlm.nih.gov/32755546 .. [2] https://zenodo.org/record/3905420 diff --git a/docs/tutorial_examples.rst b/docs/tutorial_examples.rst new file mode 100644 index 00000000..17f600b5 --- /dev/null +++ b/docs/tutorial_examples.rst @@ -0,0 +1,114 @@ +.. _rsttutorialexamples: + +==================== +Example phenopackets +==================== + +A set of example phenopackets is distributed with the *phenopacket-tools* binary. The example files should be used +to demonstrate the tools' functionality. + +The files are grouped in sub-folders by the target command:: + + examples + ├── convert + └── validate + ├── base + ├── custom-json-schema + ├── organ-systems + └── phenotype-validation + + +Convert +^^^^^^^ + +The ``convert`` folder contains one v1 phenopacket to demonstrate the conversion functionality:: + + Schreckenbach-2014-TPM3-II.2.json + +The phenopacket describes a case report of a 45 years-old female diagnosed with +`NEMALINE MYOPATHY 1; NEM1 `_ +caused by heterozygous mutation in `TPM3 `_. + + +Validate +^^^^^^^^ + +The ``validate`` directory contains files for demonstrating *off-the-shelf* phenopacket validation functionalities. + + +``base`` - base validation functionality +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The folder contains a few phenopackets for demonstrating the base validation functionality of *phenopacket-tools*; +the validation that **any phenopacket must pass**. + +.. csv-table:: + :header: "File name", "Description" + + missing-fields.json, "An invalid phenopacket with missing `id`, `subject.id` and `phenotypicFeatures[0].type.label` attributes." + missing-fields-valid.json, A valid version of the above phenopacket with IDs and the label. + missing-resources.json, An invalid phenopacket with missing `Resource` for the `NCBITaxon:9606` ontology concept used to represent organism of the subject. + missing-resources-valid.json, A valid version of the above phenopacket with the `Resource` for describing `NCBITaxon`. + + +``custom-json-schema`` - validate custom requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A bunch of phenopackets for showing how a custom JSON schema can be used to validate user-specific requirements. + +.. csv-table:: + :header: "File name", "Description" + + hpo-rare-disease-schema.json, A custom JSON schema for enforcing user-specific requirements. + marfan.no-phenotype.invalid.json, The phenopacket is *invalid* since it contains no phenotypic features. + marfan.no-subject.invalid.json, The phenopacket is *invalid* since the `subject` is missing. + marfan.no-time-at-last-encounter.invalid.json, The phenopacket is *invalid* due to missing time at last encounter. + marfan.not-hpo.invalid.json, The phenopacket is *invalid* because HPO terms are not used to represent phenotypic features. + marfan.valid.json, A phenopacket that meets the custom requirements. + + +``organ-systems`` - validate annotation of organ systems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Phenopackets for showing organ system validation. As an example, we work with phenopackets of patients with +`Marfan syndrome `_ and we require annotation +of +`Eye `_, +`Cardiovascular `_, and +`Respiratory `_ organ systems \ +either by *excluding* the corresponding top-level HPO term or by adding a descendent term. + +The phenopackets include + +.. list-table:: + :header-rows: 1 + + * - File name + - Description + * - marfan.no-abnormalities.valid.json + - A valid phenopacket of a proband with no abnormalities of the target organ systems. + * - marfan.all-organ-system-annotated.valid.json + - A valid phenopacket of a proband who had an abnormality of eye and cardiovascular systems but + no abnormality of respiratory system. Note that it is OK to have phenotypic feature of other organ system, + such as Arachnodactyly in this case. + * - marfan.missing-eye-annotation.invalid.json + - An invalid phenopacket of a proband without any annotation of the eye. + +``phenotype-validation`` - validate custom requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Phenopackets for demonstrating ontology-based validation. + +.. list-table:: + :header-rows: 1 + + * - File name + - Description + * - marfan.annotation-propagation-rule.invalid.json + - | Invalid phenopacket due to logical inconsistency in phenotypic features. The phenopacket contains + | both *Aortic root aneurysm* and its ancestor *Aortic aneurysm*. Only the more specific term should be used. + * - marfan.obsolete-term.invalid.json + - The phenopacket is *invalid* because it contains an obsolete HPO term. + * - marfan.valid.json + - A phenopacket that meets the phenotype validation requirements. + diff --git a/docs/validation.rst b/docs/validation.rst index 1f174118..40fcf2ee 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -36,6 +36,15 @@ a warning is emitted, because an annotation with a specific HPO term implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has `Ventricular septal defect `_). +API +~~~ + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to org.phenopackets.phenopackettools.validator.jsonschema module +.. Describe validation workflow in general + +.. _rstbasevalidation: Base validation ^^^^^^^^^^^^^^^ @@ -61,6 +70,86 @@ In code, this can be implemented as follows. System.out.println("Error opening the phenopacket: " + e); } +API +~~~ + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to ... and to org/phenopackets/phenopackettools/validator/core/metadata + +.. _rstphenotypevalidation: + +Phenotype validation +^^^^^^^^^^^^^^^^^^^^ + +TODO - write .. TODO - continue +API +~~~ + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to org/phenopackets/phenopackettools/validator/core/phenotype + +.. _rstcustomvalidation: + +Custom validation +^^^^^^^^^^^^^^^^^ + +TODO - write +.. TODO - continue + + +API +~~~ + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to TODO - somewhere in JSON-schema validation + + +.. _rstorgsysvalidation: + +Organ system validation +^^^^^^^^^^^^^^^^^^^^^^^ + +TODO - write +.. TODO - continue + +We can validate presence of annotation for specific organ systems in a phenopacket. + +As an example, we work with toy phenopackets that represent patients with +`Marfan syndrome `_. Due to the nature of the Marfan syndrome, +we may require annotation of three organ systems: + +* Eye +* Cardiovascular system +* Respiratory system + +The annotation is done either by *excluding* the corresponding top-level HPO term or by adding a descendent term: + +.. list-table:: + :header-rows: 1 + + * - Organ system + - Top-level HPO term + - Example descendent + * - Eye + - `Abnormality of the eye `_ + - `Ectopia lentis `_ + * - Cardiovascular system + - `Abnormality of the cardiovascular system `_ + - `Mitral regurgitation `_ + * - Respiratory system + - `Abnormality of the respiratory system `_ + - `Pneumothorax `_ + +API +~~~ + +See the ``TODO - add JavaDoc link`` for the API documentation. + +.. TODO - refer to org/phenopackets/phenopackettools/validator/core/phenotype/orgsys + diff --git a/phenopacket-tools-cli/src/assemble/distribution.xml b/phenopacket-tools-cli/src/assemble/distribution.xml index c4d093d9..54f95946 100644 --- a/phenopacket-tools-cli/src/assemble/distribution.xml +++ b/phenopacket-tools-cli/src/assemble/distribution.xml @@ -30,11 +30,7 @@ ${project.basedir}/src/examples ./examples - base/* - convert/* - custom-json-schema/* - organ-systems/* - phenotype-validation/* + **/** diff --git a/phenopacket-tools-cli/src/examples/base/README.md b/phenopacket-tools-cli/src/examples/validate/base/README.md similarity index 100% rename from phenopacket-tools-cli/src/examples/base/README.md rename to phenopacket-tools-cli/src/examples/validate/base/README.md diff --git a/phenopacket-tools-cli/src/examples/base/missing-fields-valid.json b/phenopacket-tools-cli/src/examples/validate/base/missing-fields-valid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/base/missing-fields-valid.json rename to phenopacket-tools-cli/src/examples/validate/base/missing-fields-valid.json diff --git a/phenopacket-tools-cli/src/examples/base/missing-fields.json b/phenopacket-tools-cli/src/examples/validate/base/missing-fields.json similarity index 100% rename from phenopacket-tools-cli/src/examples/base/missing-fields.json rename to phenopacket-tools-cli/src/examples/validate/base/missing-fields.json diff --git a/phenopacket-tools-cli/src/examples/base/missing-resources-valid.json b/phenopacket-tools-cli/src/examples/validate/base/missing-resources-valid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/base/missing-resources-valid.json rename to phenopacket-tools-cli/src/examples/validate/base/missing-resources-valid.json diff --git a/phenopacket-tools-cli/src/examples/base/missing-resources.json b/phenopacket-tools-cli/src/examples/validate/base/missing-resources.json similarity index 100% rename from phenopacket-tools-cli/src/examples/base/missing-resources.json rename to phenopacket-tools-cli/src/examples/validate/base/missing-resources.json diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/hpo-rare-disease-schema.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/hpo-rare-disease-schema.json similarity index 100% rename from phenopacket-tools-cli/src/examples/custom-json-schema/hpo-rare-disease-schema.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/hpo-rare-disease-schema.json diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-phenotype.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-phenotype.invalid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-phenotype.invalid.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-phenotype.invalid.json diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-subject.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-subject.invalid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-subject.invalid.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-subject.invalid.json diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.no-time-at-last-encounter.invalid.json diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.not-hpo.invalid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.not-hpo.invalid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/custom-json-schema/marfan.not-hpo.invalid.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.not-hpo.invalid.json diff --git a/phenopacket-tools-cli/src/examples/custom-json-schema/marfan.valid.json b/phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.valid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/custom-json-schema/marfan.valid.json rename to phenopacket-tools-cli/src/examples/validate/custom-json-schema/marfan.valid.json diff --git a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.all-organ-system-annotated.valid.json similarity index 62% rename from phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json rename to phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.all-organ-system-annotated.valid.json index fc712eae..c71e4e88 100644 --- a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.valid.json +++ b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.all-organ-system-annotated.valid.json @@ -2,15 +2,25 @@ "id": "id-C", "phenotypicFeatures": [{ "type": { - "id": "HP:0004942", - "label": "Aortic aneurysm" + "id": "HP:0001083", + "label": "Ectopia lentis" } }, { "type": { - "id": "HP:0002616", - "label": "Aortic root aneurysm" + "id": "HP:0001653", + "label": "Mitral regurgitation" + } + }, { + "type": { + "id": "HP:0002086", + "label": "Abnormality of the respiratory system" }, "excluded": true + }, { + "type": { + "id": "HP:0001166", + "label": "Arachnodactyly" + } }], "metaData": { "created": "2021-05-14T10:35:00Z", diff --git a/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json new file mode 100644 index 00000000..35687e3c --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.missing-eye-annotation.invalid.json @@ -0,0 +1,32 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001653", + "label": "Mitral regurgitation" + } + }, { + "type": { + "id": "HP:0002107", + "label": "Pneumothorax" + } + }, { + "type": { + "id": "HP:0001166", + "label": "Arachnodactyly" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json new file mode 100644 index 00000000..a6280f56 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/validate/organ-systems/marfan.no-abnormalities.valid.json @@ -0,0 +1,40 @@ +{ + "id": "id-C", + "phenotypicFeatures": [{ + "type": { + "id": "HP:0000478", + "label": "Abnormality of the eye" + }, + "excluded": true + }, { + "type": { + "id": "HP:0001626", + "label": "Abnormality of the cardiovascular system" + }, + "excluded": true + }, { + "type": { + "id": "HP:0002086", + "label": "Abnormality of the respiratory system" + }, + "excluded": true + }, { + "type": { + "id": "HP:0001166", + "label": "Arachnodactyly" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2021-08-02", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.annotation-propagation-rule.invalid.json b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenotype-validation/marfan.annotation-propagation-rule.invalid.json rename to phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.annotation-propagation-rule.invalid.json diff --git a/phenopacket-tools-cli/src/examples/phenotype-validation/marfan.obsolete-term.invalid.json b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.obsolete-term.invalid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/phenotype-validation/marfan.obsolete-term.invalid.json rename to phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.obsolete-term.invalid.json diff --git a/phenopacket-tools-cli/src/examples/organ-systems/marfan.valid.json b/phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.valid.json similarity index 100% rename from phenopacket-tools-cli/src/examples/organ-systems/marfan.valid.json rename to phenopacket-tools-cli/src/examples/validate/phenotype-validation/marfan.valid.json diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java index 257999e4..2c1fff7c 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/orgsys/AbstractOrganSystemValidator.java @@ -1,7 +1,6 @@ package org.phenopackets.phenopackettools.validator.core.phenotype.orgsys; import com.google.protobuf.MessageOrBuilder; -import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenol.ontology.data.Term; @@ -12,13 +11,11 @@ import org.phenopackets.phenopackettools.validator.core.phenotype.util.PhenotypicFeaturesByExclusionStatus; import org.phenopackets.phenopackettools.validator.core.phenotype.util.Util; import org.phenopackets.schema.v2.PhenopacketOrBuilder; -import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.PhenotypicFeature; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; -import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Stream; @@ -73,13 +70,13 @@ public ValidatorInfo validatorInfo() { @Override public List validate(T component) { return getPhenopackets(component) - .flatMap(p -> checkPhenotypicFeatures(p.getSubject().getId(), p.getPhenotypicFeaturesList())) + .flatMap(p -> checkPhenotypicFeatures(p, p.getPhenotypicFeaturesList())) .toList(); } protected abstract Stream getPhenopackets(T component); - private Stream checkPhenotypicFeatures(String individualId, List features) { + private Stream checkPhenotypicFeatures(PhenopacketOrBuilder phenopacket, List features) { PhenotypicFeaturesByExclusionStatus featuresByExclusion = Util.partitionByExclusionStatus(features); Stream.Builder results = Stream.builder(); @@ -102,8 +99,8 @@ private Stream checkPhenotypicFeatures(String individualId, Li Term organSystem = hpo.getTermMap().get(organSystemId); ValidationResult result = ValidationResult.error(VALIDATOR_INFO, MISSING_ORGAN_SYSTEM_CATEGORY, - "Missing annotation for %s [%s] in '%s'" - .formatted(organSystem.getName(), organSystem.id().getValue(), individualId)); + "Missing annotation for %s [%s]%s" + .formatted(organSystem.getName(), organSystem.id().getValue(), summarizePhenopacketAndIndividualId(phenopacket))); results.add(result); } diff --git a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java index 5c3a3a2e..845fdc51 100644 --- a/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java +++ b/phenopacket-tools-validator-core/src/test/java/org/phenopackets/phenopackettools/validator/core/phenotype/OrganSystemValidatorTest.java @@ -82,7 +82,7 @@ public void annotationAbsenceLeadsToAnError(boolean excluded) { assertThat(result.validatorInfo(), equalTo(slenderFingerValidator.validatorInfo())); assertThat(result.level(), equalTo(ValidationLevel.ERROR)); assertThat(result.category(), equalTo("Missing organ system annotation")); - assertThat(result.message(), equalTo("Missing annotation for Slender finger [HP:0001238] in 'example-subject'")); + assertThat(result.message(), equalTo("Missing annotation for Slender finger [HP:0001238] in example-phenopacket/example-subject")); } } From e2040afc32080680f7d106b61e09d32c0f228e74 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 11 Nov 2022 12:20:08 -0500 Subject: [PATCH 75/79] Add a toy example for `convert` command into the documentation. Signed-off-by: Daniel Danis --- docs/tutorial.rst | 75 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 4c7fbcae..c1af15f0 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -4,8 +4,8 @@ Tutorial ======== -This tutorial walks through the installation of *phenopacket-tools* and provides an overview -of the command-line interface functionality. +This tutorial walks through the installation of *phenopacket-tools* and provides an overview and an intended usage +of the command-line interface. The tutorial sections point to the parts of documentations which offer more detail. Setup ===== @@ -65,8 +65,41 @@ In response to this feedback, the schema was extended and refined and version 2 and published in 2022 by the International Standards Organization (ISO). The `convert` command of *phenopacket-tools* converts version 1 phenopackets into version 2. In this tutorial, -we will convert 384 v1 phenopackets published by Robinson et al., 2020\ [1]_ into version 2. The phenopackets -represent 384 individuals described in published case reports with Human Phenotype Ontology terms, +we will first convert an example v1 phenopacket and then 384 v1 phenopackets published by Robinson et al., 2020\ [1]_. + +A toy example +^^^^^^^^^^^^^ + +We will convert a phenopacket ``Schreckenbach-2014-TPM3-II.2.json`` that is bundled +in the *phenopacket-tools* distribution ZIP file. +The phenopacket can be found in `examples/convert` folder next to the executable JAR file. + +.. note:: + See :ref:`rsttutorialexamples` for detailed info of the example phenopackets. + +Due to differences between version 1 and 2, there are two ways how to convert *v1* phenopackets into *v2*. +Briefly, the conversion either assumes that the `Variant`\ s are *causal* with respect to a `Disease` of the +v1 phenopacket, or skips conversion of `Variant`\ s altogether. The logic is controlled with ``--convert-variants`` +CLI option and the conversion can be done iff the *v1* phenopacket has one `Disease`. + +.. note:: + See the :ref:`rstconverting` section for more information. + +Let's convert the phenopacket by running:: + + pxf convert -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json > Schreckenbach-2014-TPM3-II.2.v2.json + +The phenopacket represents a case report with several variants that are causal with respect to the disease. +Therefore, we can use ``--convert-variants`` to convert `Variant`\ s into v2 `Interpretation` element:: + + pxf convert --convert-variants \ + -i ${examples}/convert/Schreckenbach-2014-TPM3-II.2.json > Schreckenbach-2014-TPM3-II.2.v2-with-variants.json + + +A real-life example +^^^^^^^^^^^^^^^^^^^ + +Let's convert 384 individuals described in published case reports with Human Phenotype Ontology terms, causal genetic variants, and OMIM disease identifiers. Let's start by downloading and unpacking the phenopacket dataset. @@ -76,12 +109,6 @@ a folder named as ``v1``:: curl -o phenopackets.v1.zip https://zenodo.org/record/3905420/files/phenopackets.zip unzip -d v1 phenopackets.v1.zip -Due to differences between version 1 and 2, there are two ways how to convert *v1* phenopackets into *v2*. -Briefly, the conversion either assumes that the `Variant`s are *causal* with respect to a `Disease` of the -v1 phenopacket, or skips conversion of `Variant`s altogether. The logic is controlled with ``--convert-variants`` -CLI option and the conversion can be done iff the *v1* phenopacket has one `Disease`. -See the :ref:`rstconverting` section for more information. - Let's convert all *v1* phenopackets and store the results in JSON format in a new folder ``v2``:: # Make the folder for converted phenopackets. @@ -102,7 +129,9 @@ Validate The `validate` command of *phenopacket-tools* validates correctness of phenopackets, families and cohorts. This section focuses on the *off-the-shelf* phenopacket validators. -See the :ref:`rstvalidation` and the `Java Documentation`_ to learn how to implement a custom validator. + +.. note:: + See the :ref:`rstvalidation` and the `Java Documentation`_ to learn how to implement a custom validator. We will work with a suite of phenopackets that are bundled in the *phenopacket-tools* distribution ZIP file. The phenopackets are located in `examples` folder next to the executable JAR file: @@ -115,12 +144,12 @@ The phenopackets are located in `examples` folder next to the executable JAR fil We will describe each validation and show an example validation errors and a proposed solution in a table. -.. note:: - The validation examples use `Phenopacket`\ s, but the validation functionality is available for all top-level Phenopacket Schema - elements, including `Cohort` and `Family`. -.. note:: - The validation is implemented for *v2* phenopackets only. The *v1* phenopackets must be converted to *v2* prior - running validation. + +The validation examples use `Phenopacket`\ s, but the validation functionality is available for all top-level Phenopacket Schema +elements, including `Cohort` and `Family`. + +The validation is implemented for *v2* phenopackets only. The *v1* phenopackets must be converted to *v2* prior +running validation. Base validation @@ -129,7 +158,8 @@ Base validation First, let's check if the phenopackets meet the base requirements, as described by the Phenopacket Schema. All phenopackets, regardless of their aim or scope must pass this requirement to be valid. -See :ref:`rstbasevalidation` for more details. +.. note:: + See :ref:`rstbasevalidation` for more details. All required fields must be present ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -198,7 +228,8 @@ Using the custom JSON schema via ``--require`` option will point out issues in t 'phenotypicFeatures[0].type.id' does not match the regex pattern ``^HP:\d{7}$``, Use Human Phenotype Ontology in `PhenotypicFeature`\ s 'subject.timeAtLastEncounter' is missing but it is required, Add the time at last encounter field -See :ref:`rstcustomvalidation` for more details. +.. note:: + See :ref:`rstcustomvalidation` for more details. .. _rstphenotypevalidationtutorial: @@ -213,7 +244,8 @@ The phenotype validation requires the Human Phenotype Ontology (HPO) file to wor The examples below assume that the latest HPO in JSON format has been downloaded to ``hp.json``. The HPO file can be downloaded from `HPO releases`_. -See :ref:`rstphenotypevalidation` for more details. +.. note:: + See :ref:`rstphenotypevalidation` for more details. Phenopackets use non-obsolete term IDs @@ -286,7 +318,8 @@ The `HpoOrganSystemValidator` will point out one error in the `marfan.missing-ey Missing annotation for Abnormality of the eye [HP:0000478] in id-C, Annotate the eye or exclude any abnormality. -See :ref:`rstorgsysvalidation` for more details. +.. note:: + See :ref:`rstorgsysvalidation` for more details. .. [1] https://pubmed.ncbi.nlm.nih.gov/32755546 From 490cb10e2986b342379ee4c453177d5cc5c6aaa1 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 11 Nov 2022 17:22:45 -0500 Subject: [PATCH 76/79] Add JavaDoc and extend the validation documentation. Signed-off-by: Daniel Danis --- docs/validation.rst | 88 +++++++++++-------- .../src/main/java/module-info.java | 4 + .../builder/builders/package-info.java | 4 + .../builder/constants/package-info.java | 5 ++ .../builder/package-info.java | 4 + .../src/main/java/module-info.java | 3 + .../converter/converters/package-info.java | 6 ++ .../src/main/java/module-info.java | 3 + .../phenopackettools/core/package-info.java | 10 +++ .../src/main/java/module-info.java | 3 + .../phenopackettools/io/package-info.java | 12 +++ .../src/main/java/module-info.java | 3 + .../util/format/package-info.java | 3 +- .../src/main/java/module-info.java | 3 + .../validator/core/ConversionException.java | 10 ++- .../validator/core/InputError.java | 27 ------ .../validator/core/PhenopacketValidator.java | 18 ++-- .../validator/core/ValidationLevel.java | 3 + .../validator/core/ValidationResult.java | 12 +++ .../validator/core/ValidationResults.java | 12 ++- .../core/ValidationWorkflowDispatcher.java | 77 +++++++++++++++- .../ValidationWorkflowDispatcherImpl.java | 33 ++++--- .../core/ValidationWorkflowRunner.java | 33 +++++-- .../core/ValidationWorkflowRunnerBuilder.java | 5 +- .../validator/core/ValidatorInfo.java | 2 +- .../validator/core/convert/BaseConverter.java | 7 +- .../validator/core/convert/package-info.java | 5 ++ .../validator/core/except/package-info.java | 4 + .../validator/core/package-info.java | 41 +++++++++ .../core/phenotype/HpoOrganSystems.java | 19 ++-- .../core/phenotype/package-info.java | 14 +++ .../validator/core/writer/package-info.java | 4 + .../src/main/java/module-info.java | 6 +- .../validator/jsonschema/package-info.java | 14 +++ 34 files changed, 379 insertions(+), 118 deletions(-) create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java create mode 100644 phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java create mode 100644 phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java delete mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java create mode 100644 phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java diff --git a/docs/validation.rst b/docs/validation.rst index 40fcf2ee..b9b5ac62 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -5,49 +5,63 @@ Validating Phenopackets ======================= -Protobuf -^^^^^^^^ -Phenopackets schema uses protobuf, an exchange format developed -in 2008 by Google. We refer readers to the excellent +Phenopackets schema uses protobuf, an exchange format developed in 2008 by Google. We refer readers to the excellent `Wikipedia page `_ -on Protobuf and to `Google’s documentation `_ -for details. -In Protobuf (version 3, which is what the Phenopacket Schema uses), -all fields are optional. However, the Phenopacket Schema defines -certain fields to be optional +on Protobuf and to `Google’s documentation `_ for details. +In Protobuf (version 3, which is what the Phenopacket Schema uses), all fields are optional. +However, the Phenopacket Schema defines certain fields to be optional (See `documentation `_ for details). -Also, a phenopacket message can be represented in native protobuf (binary) format, JSON, YAML, and -other formats. +Moreover, projects and consortia can require application of specific constraints and requirements for the phenopackets. -Validation -^^^^^^^^^^ +*Phenopacket-tools* provides a functionality for validating phenopackets. -The *phenopacket-tools* library offers JSON-Schema-based and semantic validations. The syntactic validation -is done using JSON schema. Additionally, an interface is provided to perform arbitrary kinds of validation. -This validation should be performed for all phenopackets. +This document provides a comprehensive description of the functionality of the *off-the-shelf* validators +as well as the validation workflow API. -Additional constraints and requirements may be made for phenopackets that are used in a specific -project or for a specific collaboration or consortium. For instance, a rare-disease consortium -may require that all phenotypic features be recorded using valid HPO terms. An example class is -provided that checks all ``PhenotypicFeature`` elements, ensures that they use HPO terms with valid -(i.e., primary) id's, and checks whether both a term and an ancestor of the term are used - if so -a warning is emitted, because an annotation with a specific HPO term -(e.g., `Perimembranous ventricular septal defect `_) -implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has -`Ventricular septal defect `_). +Validation workflow +^^^^^^^^^^^^^^^^^^^ -API -~~~ +*Phenopacket-tools* defines an API for phenopacket validation workflow. The workflow is consists of +a list of validation steps. There are two types of steps: *syntax* and *semantic*. The syntax steps check syntax +and cardinality of each component separately. The semantic validators are run after syntax checks and validate +the components in the context of the entire phenopacket. + +There is one mandatory syntax validation step that is always run first: the *base* validation. The base validation +ensures the phenopacket message meets the requirements of the Phenopacket Schema. + +The results of the validation are aggregated into a container object that consists +of immutable value objects that describe the performed validations and the validation results suitable +for reporting back to the user. + +.. Additional constraints and requirements may be made for phenopackets that are used in a specific + project or for a specific collaboration or consortium. For instance, a rare-disease consortium + may require that all phenotypic features be recorded using valid HPO terms. An example class is + provided that checks all ``PhenotypicFeature`` elements, ensures that they use HPO terms with valid + (i.e., primary) id's, and checks whether both a term and an ancestor of the term are used - if so + a warning is emitted, because an annotation with a specific HPO term + (e.g., `Perimembranous ventricular septal defect `_) + implies all of the ancestors of the term (e.g., a patient with perimembranous VSD by necessity also has + `Ventricular septal defect `_). + +**API** See the ``TODO - add JavaDoc link`` for the API documentation. .. TODO - refer to org.phenopackets.phenopackettools.validator.jsonschema module .. Describe validation workflow in general +*Off-the-shelf* validators +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. TODO - continue + +TODO - describe *off-the-shelf* validators in great detail. + .. _rstbasevalidation: Base validation -^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~ + All phenopackets should be tested against the base JSON Schema (analogously for all ``Family`` and ``Cohort`` messages). In code, this can be implemented as follows. @@ -70,8 +84,7 @@ In code, this can be implemented as follows. System.out.println("Error opening the phenopacket: " + e); } -API -~~~ +**API** See the ``TODO - add JavaDoc link`` for the API documentation. @@ -80,13 +93,12 @@ See the ``TODO - add JavaDoc link`` for the API documentation. .. _rstphenotypevalidation: Phenotype validation -^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~ TODO - write .. TODO - continue -API -~~~ +**API** See the ``TODO - add JavaDoc link`` for the API documentation. @@ -95,14 +107,13 @@ See the ``TODO - add JavaDoc link`` for the API documentation. .. _rstcustomvalidation: Custom validation -^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~ TODO - write .. TODO - continue -API -~~~ +**API** See the ``TODO - add JavaDoc link`` for the API documentation. @@ -112,7 +123,7 @@ See the ``TODO - add JavaDoc link`` for the API documentation. .. _rstorgsysvalidation: Organ system validation -^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~ TODO - write .. TODO - continue @@ -145,8 +156,7 @@ The annotation is done either by *excluding* the corresponding top-level HPO ter - `Abnormality of the respiratory system `_ - `Pneumothorax `_ -API -~~~ +**API** See the ``TODO - add JavaDoc link`` for the API documentation. diff --git a/phenopacket-tools-builder/src/main/java/module-info.java b/phenopacket-tools-builder/src/main/java/module-info.java index cd4375ea..123b5a56 100644 --- a/phenopacket-tools-builder/src/main/java/module-info.java +++ b/phenopacket-tools-builder/src/main/java/module-info.java @@ -1,3 +1,7 @@ +/** + * A module with pre-defined ontology constants, convenience methods, and concise builders + * to simplify phenopacket construction. + */ module org.phenopackets.phenopackettools.builder { // No need to make it transitive since we only use runtime exceptions. requires org.phenopackets.phenopackettools.core; diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java new file mode 100644 index 00000000..e493d218 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with convenience methods and builders for creating Phenopacket Schema building blocks. + */ +package org.phenopackets.phenopackettools.builder.builders; \ No newline at end of file diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java new file mode 100644 index 00000000..62cc2bf2 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/package-info.java @@ -0,0 +1,5 @@ +/** + * The {@code org.phenopackets.phenopackettools.builder.constants} provides pre-defined constants from + * the recommended ontologies. + */ +package org.phenopackets.phenopackettools.builder.constants; \ No newline at end of file diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java new file mode 100644 index 00000000..42933cb3 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with builders for top-level elements of Phenopacket Schema. + */ +package org.phenopackets.phenopackettools.builder; \ No newline at end of file diff --git a/phenopacket-tools-converter/src/main/java/module-info.java b/phenopacket-tools-converter/src/main/java/module-info.java index a825bd10..5d4a9fce 100644 --- a/phenopacket-tools-converter/src/main/java/module-info.java +++ b/phenopacket-tools-converter/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * A module for converting between the {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion}s. + */ module org.phenopackets.phenopackettools.converter { requires transitive org.phenopackets.schema; requires org.phenopackets.phenopackettools.core; diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java new file mode 100644 index 00000000..144d0c37 --- /dev/null +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/package-info.java @@ -0,0 +1,6 @@ +/** + * The package provides a {@link org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter} to convert + * from {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion#V1} + * to {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion#V2}. + */ +package org.phenopackets.phenopackettools.converter.converters; \ No newline at end of file diff --git a/phenopacket-tools-core/src/main/java/module-info.java b/phenopacket-tools-core/src/main/java/module-info.java index 5a3b93e6..ddf4c946 100644 --- a/phenopacket-tools-core/src/main/java/module-info.java +++ b/phenopacket-tools-core/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * The module defines core concepts shared by (almost) all modules of phenopacket-tools. + */ module org.phenopackets.phenopackettools.core { exports org.phenopackets.phenopackettools.core; } \ No newline at end of file diff --git a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java index d88cf715..0aa8cba5 100644 --- a/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java +++ b/phenopacket-tools-core/src/main/java/org/phenopackets/phenopackettools/core/package-info.java @@ -1,4 +1,14 @@ /** * A package with constants and types used across the entire application, including the base exception classes. + *

+ * The package contains the base checked exception {@link org.phenopackets.phenopackettools.core.PhenopacketToolsException} + * and unchecked exception {@link org.phenopackets.phenopackettools.core.PhenopacketToolsRuntimeException}. + *

+ * Several useful enumerations complete the circle: + *

    + *
  • {@link org.phenopackets.phenopackettools.core.PhenopacketElement}
  • + *
  • {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}
  • + *
  • {@link org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion}
  • + *
*/ package org.phenopackets.phenopackettools.core; \ No newline at end of file diff --git a/phenopacket-tools-io/src/main/java/module-info.java b/phenopacket-tools-io/src/main/java/module-info.java index 07a662f6..d5534857 100644 --- a/phenopacket-tools-io/src/main/java/module-info.java +++ b/phenopacket-tools-io/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * A module for reading and writing top-level elements of Phenopacket Schema. + */ module org.phenopackets.phenopackettools.io { requires org.phenopackets.phenopackettools.util; diff --git a/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java new file mode 100644 index 00000000..9d18e4ae --- /dev/null +++ b/phenopacket-tools-io/src/main/java/org/phenopackets/phenopackettools/io/package-info.java @@ -0,0 +1,12 @@ +/** + * The {@code org.phenopackets.phenopackettools.io} package offers functionality for reading and writing + * top-level elements of Phenopacket Schema. The elements can be (de)serialized in any of the supported + * {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + *

+ * The {@link org.phenopackets.phenopackettools.io.PhenopacketParserFactory} + * provides {@link org.phenopackets.phenopackettools.io.PhenopacketParser} for reading the schema elements. + *

+ * Use {@link org.phenopackets.phenopackettools.io.PhenopacketPrinterFactory} to get + * {@link org.phenopackets.phenopackettools.io.PhenopacketPrinter} for writing a top-level schema element. + */ +package org.phenopackets.phenopackettools.io; \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/module-info.java b/phenopacket-tools-util/src/main/java/module-info.java index f316d6a6..6c8793a5 100644 --- a/phenopacket-tools-util/src/main/java/module-info.java +++ b/phenopacket-tools-util/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * A module with utility functions. + */ module org.phenopackets.phenopackettools.util { requires transitive org.phenopackets.phenopackettools.core; requires org.slf4j; diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java index fecd687b..9e9da40d 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java @@ -1,4 +1,5 @@ /** - * Defines the supported phenopacket formats and utility methods for working with the formats. + * Defines utility methods for working with {@link org.phenopackets.phenopackettools.core.PhenopacketElement}s + * and {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. */ package org.phenopackets.phenopackettools.util.format; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/module-info.java b/phenopacket-tools-validator-core/src/main/java/module-info.java index b64606c4..0c5824ee 100644 --- a/phenopacket-tools-validator-core/src/main/java/module-info.java +++ b/phenopacket-tools-validator-core/src/main/java/module-info.java @@ -1,3 +1,6 @@ +/** + * Defines the base APIs for phenopacket validation. + */ module org.phenopackets.phenopackettools.validator.core { exports org.phenopackets.phenopackettools.validator.core; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java index 6d4250f7..c5d76cb8 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ConversionException.java @@ -3,7 +3,15 @@ import org.phenopackets.phenopackettools.core.PhenopacketToolsException; /** - * A {@link PhenopacketToolsException} that is thrown in case the provided data has incorrect format. + * A {@link PhenopacketToolsException} that is thrown by {@link org.phenopackets.phenopackettools.validator.core.convert.PhenopacketConverter} + * in case the provided data has incorrect format. + *

+ * This can happen if e.g. the {@code payload} to + * {@link org.phenopackets.phenopackettools.validator.core.convert.PhenopacketConverter#toJson(byte[])} + * is not valid JSON. + *

+ * {@code ConversionException} implements {@link ValidationResult} so that it can be reported + * by a {@link PhenopacketValidator}. */ public class ConversionException extends PhenopacketToolsException implements ValidationResult { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java deleted file mode 100644 index 0d93f1e4..00000000 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/InputError.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.phenopackets.phenopackettools.validator.core; - -/** - * {@link ValidationResult} returned when encountering a format error. - * @param message message to present the user. - */ -record InputError(String message) implements ValidationResult { - - private static final String VALIDATION_CATEGORY = "input"; - - - @Override - public ValidatorInfo validatorInfo() { - return ValidatorInfo.inputValidator(); - } - - @Override - public ValidationLevel level() { - return ValidationLevel.ERROR; - } - - @Override - public String category() { - return VALIDATION_CATEGORY; - } - -} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java index dda03f96..6d38d083 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/PhenopacketValidator.java @@ -5,21 +5,21 @@ import java.util.List; /** - * {@link PhenopacketValidator} validates a top-level component of Phenopacket schema. - *

- * The top-level component must be one of the following types: - *

    - *
  • {@link org.phenopackets.schema.v2.Phenopacket}
  • - *
  • {@link org.phenopackets.schema.v2.Family}
  • - *
  • {@link org.phenopackets.schema.v2.Cohort}
  • - *
+ * {@link PhenopacketValidator} represents a single step of the validation workflow. + * The validator checks a top-level component of Phenopacket Schema. * - * @param type of the top-level component. + * @param type of the top-level element of the Phenopacket Schema. */ public interface PhenopacketValidator { + /** + * @return description of the validator and the validation logic. + */ ValidatorInfo validatorInfo(); + /** + * Validate the {@code component} and summarize the results into a {@link List} of {@link ValidationResult}s. + */ List validate(T component); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java index 87fc622b..6958be59 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationLevel.java @@ -1,5 +1,8 @@ package org.phenopackets.phenopackettools.validator.core; +/** + * {@code ValidationLevel} represents a severity level for {@link ValidationResult}. + */ public enum ValidationLevel { /** diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java index d4e77eb3..f8193cd6 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResult.java @@ -1,19 +1,31 @@ package org.phenopackets.phenopackettools.validator.core; +/** + * {@code ValidationResult} contains results of a single validation step performed by a {@link PhenopacketValidator}. + */ public interface ValidationResult { + /** + * Create a {@link ValidationLevel#WARNING} result from given data. + */ static ValidationResult warning(ValidatorInfo validatorInfo, String category, String message) { return of(validatorInfo, ValidationLevel.WARNING, category, message); } + /** + * Create a {@link ValidationLevel#ERROR} result from given data. + */ static ValidationResult error(ValidatorInfo validatorInfo, String category, String message) { return of(validatorInfo, ValidationLevel.ERROR, category, message); } + /** + * Create a {@code ValidationResult} from given data. + */ static ValidationResult of(ValidatorInfo validatorInfo, ValidationLevel level, String category, diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java index debd46ac..da4aa633 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationResults.java @@ -4,9 +4,12 @@ import java.util.List; /** - * {@link ValidationResults} contain validation results for one Phenopacket schema top-level element - * (phenopacket, family, or cohort). - * The results contain info regarding which validators were run and the issues found during the validation. + * {@code ValidationResults} contain validation results for one Phenopacket schema top-level element + * ({@link org.phenopackets.schema.v2.Phenopacket}, {@link org.phenopackets.schema.v2.Family}, + * or {@link org.phenopackets.schema.v2.Cohort}). + *

+ * The results contain info regarding which validators were run ({@link #validators()}) and the issues found during + * the validation ({@link #validationResults()}). */ public interface ValidationResults { @@ -42,6 +45,9 @@ default boolean isValid() { return validationResults().isEmpty(); } + /** + * A builder for creating {@link ValidationResults}. + */ class Builder { private final List validators = new ArrayList<>(); diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java index 4591051a..e90b9baf 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcher.java @@ -5,21 +5,96 @@ import org.phenopackets.schema.v2.Phenopacket; /** - * {@link ValidationWorkflowDispatcher} exposes endpoints for validating top-level elements of Phenopacket schema + * {@link ValidationWorkflowDispatcher} exposes endpoints for validating top-level elements of Phenopacket Schema * and dispatches the data into the appropriate {@link ValidationWorkflowRunner}. */ public interface ValidationWorkflowDispatcher { + static ValidationWorkflowDispatcher of(ValidationWorkflowRunner phenopacketValidationRunner, + ValidationWorkflowRunner familyValidationRunner, + ValidationWorkflowRunner cohortValidationRunner) { + return new ValidationWorkflowDispatcherImpl(phenopacketValidationRunner, familyValidationRunner, cohortValidationRunner); + } + + /** + * Validate a phenopacket starting from a pile of bytes. + * + * @param bytes that can represent a phenopacket in either + * of {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return validation results. + */ ValidationResults validatePhenopacket(byte[] bytes); + + /** + * Validate a phenopacket starting from a string. + * + * @param string that can represent a phenopacket either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML} format. + * @return validation results. + */ ValidationResults validatePhenopacket(String string); + + /** + * Validate a phenopacket starting from a protobuf object. + * + * @param phenopacket to be validated. + * @return validation results. + */ ValidationResults validatePhenopacket(Phenopacket phenopacket); + /** + * Validate a family starting from a pile of bytes. + * + * @param bytes that can represent a family in either + * of {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return validation results. + */ ValidationResults validateFamily(byte[] bytes); + + /** + * Validate a family starting from a string. + * + * @param string that can represent a family either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML} format. + * @return validation results. + */ ValidationResults validateFamily(String string); + + /** + * Validate a family starting from a protobuf object. + * + * @param family to be validated. + * @return validation results. + */ ValidationResults validateFamily(Family family); + /** + * Validate a cohort starting from a pile of bytes. + * + * @param bytes that can represent a cohort in either + * of {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return validation results. + */ ValidationResults validateCohort(byte[] bytes); + + /** + * Validate a cohort starting from a string. + * + * @param string that can represent a cohort either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML} format. + * @return validation results. + */ ValidationResults validateCohort(String string); + + /** + * Validate a cohort starting from a protobuf object. + * + * @param cohort to be validated. + * @return validation results. + */ ValidationResults validateCohort(Cohort cohort); } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java index 184399c8..c15a29fc 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowDispatcherImpl.java @@ -10,24 +10,23 @@ * A {@link ValidationWorkflowDispatcher} implementation that uses 3 {@link ValidationWorkflowRunner} to validate * top-level elements of the Phenopacket schema. */ -public class ValidationWorkflowDispatcherImpl implements ValidationWorkflowDispatcher { +class ValidationWorkflowDispatcherImpl implements ValidationWorkflowDispatcher { private final ValidationWorkflowRunner phenopacketValidationRunner; private final ValidationWorkflowRunner familyValidationRunner; private final ValidationWorkflowRunner cohortValidationRunner; - - public ValidationWorkflowDispatcherImpl(ValidationWorkflowRunner phenopacketValidationRunner, - ValidationWorkflowRunner familyValidationRunner, - ValidationWorkflowRunner cohortValidationRunner) { + ValidationWorkflowDispatcherImpl(ValidationWorkflowRunner phenopacketValidationRunner, + ValidationWorkflowRunner familyValidationRunner, + ValidationWorkflowRunner cohortValidationRunner) { this.phenopacketValidationRunner = Objects.requireNonNull(phenopacketValidationRunner); this.familyValidationRunner = Objects.requireNonNull(familyValidationRunner); this.cohortValidationRunner = Objects.requireNonNull(cohortValidationRunner); } @Override - public ValidationResults validatePhenopacket(Phenopacket phenopacket) { - return phenopacketValidationRunner.validate(phenopacket); + public ValidationResults validatePhenopacket(byte[] bytes) { + return phenopacketValidationRunner.validate(bytes); } @Override @@ -36,13 +35,13 @@ public ValidationResults validatePhenopacket(String string) { } @Override - public ValidationResults validatePhenopacket(byte[] bytes) { - return phenopacketValidationRunner.validate(bytes); + public ValidationResults validatePhenopacket(Phenopacket phenopacket) { + return phenopacketValidationRunner.validate(phenopacket); } @Override - public ValidationResults validateFamily(Family family) { - return familyValidationRunner.validate(family); + public ValidationResults validateFamily(byte[] bytes) { + return familyValidationRunner.validate(bytes); } @Override @@ -51,13 +50,13 @@ public ValidationResults validateFamily(String string) { } @Override - public ValidationResults validateFamily(byte[] bytes) { - return familyValidationRunner.validate(bytes); + public ValidationResults validateFamily(Family family) { + return familyValidationRunner.validate(family); } @Override - public ValidationResults validateCohort(Cohort cohort) { - return cohortValidationRunner.validate(cohort); + public ValidationResults validateCohort(byte[] bytes) { + return cohortValidationRunner.validate(bytes); } @Override @@ -66,8 +65,8 @@ public ValidationResults validateCohort(String string) { } @Override - public ValidationResults validateCohort(byte[] bytes) { - return cohortValidationRunner.validate(bytes); + public ValidationResults validateCohort(Cohort cohort) { + return cohortValidationRunner.validate(cohort); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java index 76fc8a37..9671cbc7 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunner.java @@ -8,11 +8,14 @@ import java.util.List; /** - * {@link ValidationWorkflowRunner} validates selected top-level element of the Phenopacket schema. + * {@link ValidationWorkflowRunner} validates selected top-level element of the Phenopacket Schema. *

* The validation is performed on 3 input types: {@link #validate(MessageOrBuilder)} validates an existing top-level - * element, {@link #validate(String)} validates input formatted in JSON format, - * and {@link #validate(byte[])} validates bytes that can be either in JSON or Protobuf binary exchange format. + * element, {@link #validate(String)} validates input formatted either + * in {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML}, + * and {@link #validate(byte[])} validates a pile of bytes that can be in either + * of the {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. *

* Validator provides a list with {@link ValidatorInfo} that describes validations * done by the {@link ValidationWorkflowRunner}. @@ -24,7 +27,7 @@ * (e.g. a phenopacket contains an HPO term but an HPO {@link org.phenopackets.schema.v2.core.Resource} is missing * in {@link org.phenopackets.schema.v2.core.MetaData}). * - * @param type of the top-level element of the Phenopacket schema. + * @param type of the top-level element of the Phenopacket Schema. */ public interface ValidationWorkflowRunner { @@ -34,10 +37,30 @@ public interface ValidationWorkflowRunner { */ List validators(); + /** + * Validate a top-level element starting from a pile of bytes. + * + * @param payload top-level element in one of the {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + * @return the validation results. + */ ValidationResults validate(byte[] payload); - ValidationResults validate(String json); + /** + * Validate a top-level element starting from a string. + * + * @param value top-level element in either {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#JSON} + * or {@link org.phenopackets.phenopackettools.core.PhenopacketFormat#YAML}. + * @return the validation results. + */ + // TODO - include YAML validation. + ValidationResults validate(String value); + /** + * Validate a top-level element starting from a protobuf item. + * + * @param item the top-level element as protobuf item. + * @return the validation results. + */ ValidationResults validate(T item); default ValidationResults validate(InputStream is) throws IOException { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java index e4018af0..31d6263f 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidationWorkflowRunnerBuilder.java @@ -7,8 +7,9 @@ /** * The base builder for constructing {@link ValidationWorkflowRunner}. The builder keeps track of - * the syntax validators and semantic validators. - * @param + * the syntax and semantic validators. + * + * @param type of the top-level element of the Phenopacket Schema. */ public abstract class ValidationWorkflowRunnerBuilder { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java index 99fd4a8b..3714b4ae 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/ValidatorInfo.java @@ -1,7 +1,7 @@ package org.phenopackets.phenopackettools.validator.core; /** - * Information regarding validator. + * A description of a {@link PhenopacketValidator}. */ public interface ValidatorInfo { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java index 8b5ffc80..5713bb52 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/BaseConverter.java @@ -4,18 +4,19 @@ import com.google.protobuf.MessageOrBuilder; import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter; +import org.phenopackets.phenopackettools.validator.core.except.PhenopacketValidatorRuntimeException; abstract class BaseConverter implements PhenopacketFormatConverter { - protected final JsonFormat.Parser parser = JsonFormat.parser(); - protected final JsonFormat.Printer printer = JsonFormat.printer(); + protected static final JsonFormat.Parser parser = JsonFormat.parser(); + protected static final JsonFormat.Printer printer = JsonFormat.printer(); @Override public String toJson(T item) { try { return printer.print(item); } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(e); + throw new PhenopacketValidatorRuntimeException(e); } } diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java new file mode 100644 index 00000000..ec95fd57 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/convert/package-info.java @@ -0,0 +1,5 @@ +/** + * A module-private package with {@link org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter} + * implementations. + */ +package org.phenopackets.phenopackettools.validator.core.convert; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java new file mode 100644 index 00000000..bbd53a90 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/except/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with exceptions that can be thrown by the validation code. + */ +package org.phenopackets.phenopackettools.validator.core.except; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java new file mode 100644 index 00000000..c82ea30b --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/package-info.java @@ -0,0 +1,41 @@ +/** + * The package provides APIs and default implementations of phenopacket validation. + *

+ *

Actors

+ * This section describes the actors of the validation workflow (the classes for representing behavior + * for "doing stuff"), starting from the basic elements. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverters} is a static factory class + * for providing {@link org.phenopackets.phenopackettools.validator.core.PhenopacketFormatConverter}s to convert + * the top-level elements of Phenopacket Schema between the supported + * {@link org.phenopackets.phenopackettools.core.PhenopacketFormat}s. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator} represents a single step + * of the validation workflow. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} applies + * the {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}s of the validation workflow in + * the correct order, ensuring the base validation is always run as first. + * The {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} validates + * a top-level element. + *

+ * The {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowDispatcher} exposes methods + * for validating all top-level elements of the Phenopacket Schema. + * + *

Value objects

+ * The package includes stateful objects with no complex behavior starting from the most complex objects. + *

+ * The {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} + * and {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowDispatcher} return + * {@link org.phenopackets.phenopackettools.validator.core.ValidationResults}, a container with results + * of the validation workflow. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidationResult} contains results of + * a single validation step. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidatorInfo} describes + * the {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator}. + *

+ * {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel} + */ +package org.phenopackets.phenopackettools.validator.core; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java index 5ee91527..22554677 100644 --- a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/HpoOrganSystems.java @@ -3,16 +3,19 @@ import org.monarchinitiative.phenol.ontology.data.TermId; /** - * This class contains constants that correspond to the upper-level HPO organ-system phenotypic abnormalities. - * They can be used together with the {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.OrganSystem} - * validators, which enforce that a phenopacket contains at least one term from a set of organ systems (observed or excluded). - * Note that users can also use any HPO term in this way -- the validator will enforce that the phenopacket has an HPO term that descends from it, - * but the most common use cases are these organ-level terms - *

{@code
- * Ontology hpo = ...;
+ * A class with constants that correspond to the upper-level HPO organ-system phenotypic abnormalities.
+ * 

+ * The constants can be used together with the + * {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.OrganSystem} validators, + * which enforce that a phenopacket contains at least one term from a set of organ systems (observed or excluded). + *

+ * Note that users can also use any HPO term in this way -- the validator will enforce that the phenopacket + * has an HPO term that descends from it, but the most common use cases are these organ-level terms. + * + *

+ * Ontology hpo = ...; // get the ontology
  * var requiredOrganSystems = Set.of(BLOOD, CARDIOVASCULAR, SKELETAL);
  * var validator = HpoPhenotypeValidators.OrganSystem.phenopacketHpoOrganSystemValidator(hpo, requiredOrganSystems);
- * }
  * 
*/ public class HpoOrganSystems { diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java new file mode 100644 index 00000000..7c983548 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/phenotype/package-info.java @@ -0,0 +1,14 @@ +/** + * Package with off-the-shelf validators that work with Human Phenotype Ontology (HPO). + *

+ * The validators are exposed via a static factory class, there is a method for getting a validator for each top-level + * Phenopacket Schema component. + *

+ * The package includes a utility class with HPO {@link org.monarchinitiative.phenol.ontology.data.TermId}s + * that correspond to organ systems + * (e.g. {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoOrganSystems#EYE} for + * Abnormality of the eye) that can be used + * in combination with + * {@link org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators.OrganSystem} validators. + */ +package org.phenopackets.phenopackettools.validator.core.phenotype; \ No newline at end of file diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java new file mode 100644 index 00000000..07399df3 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/package-info.java @@ -0,0 +1,4 @@ +/** + * A package with API for serialization of {@link org.phenopackets.phenopackettools.validator.core.ValidationResults}. + */ +package org.phenopackets.phenopackettools.validator.core.writer; \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java index a70afb52..df165e96 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java @@ -1,5 +1,9 @@ /** - * The module provides a {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} implementation + * Defines a {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} with base + * validation backed by a JSON schema. + *

+ * The module provides {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} + * an implementation of {@link org.phenopackets.phenopackettools.validator.core.ValidationWorkflowRunner} * backed by a JSON schema validator. * * @see org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java new file mode 100644 index 00000000..6e82c1a2 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/package-info.java @@ -0,0 +1,14 @@ +/** + * The package provides {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} + * and {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerBuilder} + * for validating phenopackets. + *

+ * The {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunnerBuilder} builds + * {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} for validating + * {@link org.phenopackets.schema.v2.Phenopacket}, {@link org.phenopackets.schema.v2.Family} + * or {@link org.phenopackets.schema.v2.Cohort}. + *

+ * The {@link org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner} applies the + * validation steps on phenopacket elements. + */ +package org.phenopackets.phenopackettools.validator.jsonschema; \ No newline at end of file From 3eadc4b4f35c115c746601019d12fae68d0506c0 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 12 Nov 2022 23:40:38 -0500 Subject: [PATCH 77/79] Update versions, add CHANGELOG.rst entry. Signed-off-by: Daniel Danis --- CHANGELOG.rst | 8 +++++--- docs/cli.rst | 6 ++---- docs/conf.py | 2 +- .../java/org/phenopackets/phenopackettools/cli/Main.java | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ddee17d4..a8f40949 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,12 +2,14 @@ Changelog ========= -latest ------- +0.4.7 +----- +* Add more predefined constants into the builder module, update the code for generating constants +* Write a tutorial with examples for validation and conversion functionalities * Add I/O module, implement YAML parser and printer * Add organ system validator -* Update generated code for the constants +* Finalize VRS-like validation * Let the user choose the CLI verbosity v0.4.6 diff --git a/docs/cli.rst b/docs/cli.rst index a1a4ce42..af4ae476 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -47,13 +47,10 @@ Run the following commands to check out the stable source code and to build the $ cd phenopacket-tools $ ./mvnw -Prelease package -After a successful build, a distribution ZIP file ``phenopacket-tools-cli-${project.version}-distribution.zip`` +After a successful build, a distribution ZIP file "phenopacket-tools-cli-|release|-distribution.zip" will be created in the ``phenopacket-tools-cli/target`` directory. Use the ZIP archive in the same way as the archive downloaded from *phenopacket-tools* releases. -.. note:: - Replace ``${project.version}`` with a given version (e.g. ``0.4.6``). - Commands ~~~~~~~~ @@ -186,3 +183,4 @@ Set up autocompletion .. TODO - write the section +TODO - write diff --git a/docs/conf.py b/docs/conf.py index 866a9c32..56357ef7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,7 +33,7 @@ # The short X.Y version. version = '0.4' # The full version, including alpha/beta/rc tags. -release = '0.4.7-SNAPSHOT' +release = '0.4.7' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java index c3a8f938..6041658a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/Main.java @@ -26,7 +26,7 @@ public class Main { public static final String HEADER = "phenopacket-tools\nAn application for creating, converting and validating GA4GH phenopackets.\n"; - public static final String VERSION = "phenopacket-tools v0.4.7-SNAPSHOT"; + public static final String VERSION = "phenopacket-tools v0.4.7"; // Maximum number of characters in line of the usage message. public static final int USAGE_WIDTH = 120; From 148748dadf40fb56142da9ddc5b60ac820656921 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 12 Nov 2022 23:41:06 -0500 Subject: [PATCH 78/79] Remove `supplementary folder`. Signed-off-by: Daniel Danis --- supplementary/README.md | 4 --- supplementary/hpo-rare-disease-schema.json | 41 ---------------------- 2 files changed, 45 deletions(-) delete mode 100644 supplementary/README.md delete mode 100644 supplementary/hpo-rare-disease-schema.json diff --git a/supplementary/README.md b/supplementary/README.md deleted file mode 100644 index afe140ed..00000000 --- a/supplementary/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# README - -Consider deleting the `supplementary` folder since another copy of the HPO rare disease schema is bundled -into distribution ZIP. diff --git a/supplementary/hpo-rare-disease-schema.json b/supplementary/hpo-rare-disease-schema.json deleted file mode 100644 index d56a28fd..00000000 --- a/supplementary/hpo-rare-disease-schema.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2019-09/schema#", - "$id": "example.hpo.jsonschema.validator", - "title": "HPO Rare Disease Phenopacket Schema", - "description": "HPO Rare Disease Schema for GA4GH Phenopacket", - "_comment": "Here we require the phenopacket to have the following elements that are not required by the default schema 1. subject (proband being investigated) 2. at least one phenotypicFeature element 3. time_at_last encounter (subelement of subject), representing the age of the proband. In addition, we require that Human Phenotype Ontology (HPO) terms are used to represent phenotypicFeature", - "type": "object", - "properties": { - "subject": { - "type": "object", - "description": "The subject element is required for a rare-disease Phenopacket", - "properties": { - "timeAtLastEncounter": { - "type": "object", - "description": "The time at last encounter is required for a rare-disease phenopacket" - } - }, - "required": [ - "timeAtLastEncounter" - ] - }, - "phenotypicFeatures": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "id": { - "type": "string", - "pattern": "^HP:\\([0-9]{7}$" - } - } - } - ] - } - }, - "required": [ - "subject", - "phenotypicFeatures" - ] -} \ No newline at end of file From 144aa94f698303d5afb50cf6083ace4a726a200c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Sat, 12 Nov 2022 23:46:06 -0500 Subject: [PATCH 79/79] Set versions to `v0.4.7`. Signed-off-by: Daniel Danis --- phenopacket-tools-builder/pom.xml | 2 +- phenopacket-tools-cli/pom.xml | 2 +- phenopacket-tools-converter/pom.xml | 2 +- phenopacket-tools-core/pom.xml | 2 +- phenopacket-tools-io/pom.xml | 2 +- phenopacket-tools-test/pom.xml | 2 +- phenopacket-tools-util/pom.xml | 2 +- phenopacket-tools-validator-core/pom.xml | 2 +- phenopacket-tools-validator-jsonschema/pom.xml | 2 +- pom.xml | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/phenopacket-tools-builder/pom.xml b/phenopacket-tools-builder/pom.xml index 39f876d0..5570812c 100644 --- a/phenopacket-tools-builder/pom.xml +++ b/phenopacket-tools-builder/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-builder diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index 7d345f6f..0622ae02 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-cli diff --git a/phenopacket-tools-converter/pom.xml b/phenopacket-tools-converter/pom.xml index 0dd20d5e..2d48914e 100644 --- a/phenopacket-tools-converter/pom.xml +++ b/phenopacket-tools-converter/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-converter diff --git a/phenopacket-tools-core/pom.xml b/phenopacket-tools-core/pom.xml index 2b2ac661..46f0eab8 100644 --- a/phenopacket-tools-core/pom.xml +++ b/phenopacket-tools-core/pom.xml @@ -6,7 +6,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-core diff --git a/phenopacket-tools-io/pom.xml b/phenopacket-tools-io/pom.xml index 7a33fdfa..3768f2eb 100644 --- a/phenopacket-tools-io/pom.xml +++ b/phenopacket-tools-io/pom.xml @@ -6,7 +6,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-io diff --git a/phenopacket-tools-test/pom.xml b/phenopacket-tools-test/pom.xml index 8d9730f5..d0f5ce0e 100644 --- a/phenopacket-tools-test/pom.xml +++ b/phenopacket-tools-test/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7-SNAPSHOT + 0.4.7 4.0.0 diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml index e595d0f1..7bb365de 100644 --- a/phenopacket-tools-util/pom.xml +++ b/phenopacket-tools-util/pom.xml @@ -5,7 +5,7 @@ phenopacket-tools org.phenopackets.phenopackettools - 0.4.7-SNAPSHOT + 0.4.7 4.0.0 diff --git a/phenopacket-tools-validator-core/pom.xml b/phenopacket-tools-validator-core/pom.xml index 52bb7664..97b8b750 100644 --- a/phenopacket-tools-validator-core/pom.xml +++ b/phenopacket-tools-validator-core/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-validator-core diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index b27e0686..10ef5f3d 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -7,7 +7,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7-SNAPSHOT + 0.4.7 phenopacket-tools-validator-jsonschema diff --git a/pom.xml b/pom.xml index ef98b259..38051a77 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.phenopackets.phenopackettools phenopacket-tools - 0.4.7-SNAPSHOT + 0.4.7 pom