From cd8055b4b637aa4f8f2e91b7d2024b9cacea882a Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 19 Apr 2024 12:29:33 +0200 Subject: [PATCH 1/6] first commit translations --- .../phenopacket2prompt/Main.java | 1 + .../cmd/GptTranslateCommand.java | 39 +++++++++++++ .../cmd/OntoGptCommand.java | 2 +- .../model/ppkt/PpktIndividual.java | 46 +++++++++++++++ .../model/PpktIndividualTest.java | 56 +++++++++++++++++++ 5 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/Main.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/Main.java index 48c5c88..e1856b0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/Main.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/Main.java @@ -17,6 +17,7 @@ public static void main(String[] args){ CommandLine cline = new CommandLine(new Main()) .addSubcommand("download", new DownloadCommand()) .addSubcommand("gpt", new OntoGptCommand()) + .addSubcommand("translate", new GptTranslateCommand()) ; cline.setToggleBooleanFlags(false); int exitCode = cline.execute(args); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java new file mode 100644 index 0000000..ba8b1b2 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -0,0 +1,39 @@ +package org.monarchinitiative.phenopacket2prompt.cmd; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.File; +import java.util.concurrent.Callable; + + +@CommandLine.Command(name = "translate", aliases = {"T"}, + mixinStandardHelpOptions = true, + description = "Translate phenopackets and output prompts") +public class GptTranslateCommand implements Callable { + Logger LOGGER = LoggerFactory.getLogger(GptTranslateCommand.class); + + + @CommandLine.Option(names = {"--hp"}, + description = "path to HP json file") + private String hpoJsonPath = "data/hp.json"; + + + @Override + public Integer call() throws Exception { + File hpJsonFile = new File(hpoJsonPath); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + LOGGER.info("HPO version {}", hpo.version().orElse("n/a")); + System.out.println(hpo.version().orElse("n/a")); + + + return 0; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java index 5a05e97..1e9d1f3 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java @@ -14,7 +14,7 @@ import static org.monarchinitiative.phenopacket2prompt.querygen.QueryOutputType.*; -@CommandLine.Command(name = "gpt-time", aliases = {"T"}, +@CommandLine.Command(name = "gpt-time", aliases = {"G"}, mixinStandardHelpOptions = true, description = "Create GPT time-course prompt") public class OntoGptCommand implements Callable { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java new file mode 100644 index 0000000..6e44ca9 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java @@ -0,0 +1,46 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.phenopackets.schema.v2.Phenopacket; +import com.google.protobuf.util.JsonFormat; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +public class PpktIndividual { + Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); + + private final Phenopacket ppkt; + + private final String phenopacketId; + + + public PpktIndividual(File ppktJsonFile) { + JSONParser parser = new JSONParser(); + try { + Object obj = parser.parse(new FileReader(ppktJsonFile)); + JSONObject jsonObject = (JSONObject) obj; + String phenopacketJsonString = jsonObject.toJSONString(); + Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder(); + JsonFormat.parser().merge(phenopacketJsonString, phenoPacketBuilder); + this.ppkt = phenoPacketBuilder.build(); + } catch (IOException | ParseException e1) { + LOGGER.error("Could not ingest phenopacket: {}", e1.getMessage()); + throw new PhenolRuntimeException("Could not load phenopacket at " + ppktJsonFile); + } + this.phenopacketId = ppkt.getId(); + } + + public String getPhenopacketId() { + return phenopacketId; + } + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java new file mode 100644 index 0000000..2e42d8e --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java @@ -0,0 +1,56 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.monarchinitiative.phenopacket2prompt.model.ppkt.PpktIndividual; + +import java.io.File; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class PpktIndividualTest { + + + private static PpktIndividual ppktIndividual; + + + @BeforeAll + public static void init() { + String ppktPath = "/data/GCDH_test_ppkt.json"; + // ClassLoader classLoader = PpktIndividualTest.class.getClassLoader(); + // File file = new File(Objects.requireNonNull(classLoader.getResource(ppktPath)).getFile()); + Path resourceDirectory = Paths.get("src","test","resources", "data", "GCDH_test_ppkt.json"); + File file = resourceDirectory.toFile(); + ppktIndividual = new PpktIndividual(file); + System.out.println(ppktIndividual); + } + + @Test + public void testCTOR() { + Path resourceDirectory = Paths.get("src","test","resources", "data", "GCDH_test_ppkt.json"); + File file = resourceDirectory.toFile(); + PpktIndividual i = new PpktIndividual(file); + System.out.println(ppktIndividual); + Assertions.assertNotNull(ppktIndividual); + } + + + @Test + public void testPhenopacketId() { + Path resourceDirectory = Paths.get("src","test","resources", "data", "GCDH_test_ppkt.json"); + File file = resourceDirectory.toFile(); + PpktIndividual i = new PpktIndividual(file); + + + String expected = "PMID_27672653_Individual_1"; + assertEquals(expected, i.getPhenopacketId()); + } + + + + + +} From 0a8ff691194c33af1294375d69434ec3469d8d87 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 22 Apr 2024 17:42:02 +0200 Subject: [PATCH 2/6] extracting ppkt --- .../model/ppkt/HpoOnsetAge.java | 25 ++ .../model/ppkt/Iso8601Age.java | 22 ++ .../model/ppkt/OntologyTerm.java | 47 ++++ .../model/ppkt/PhenopacketAge.java | 10 + .../model/ppkt/PhenopacketAgeType.java | 6 + .../model/ppkt/PhenopacketDisease.java | 22 ++ .../model/ppkt/PhenopacketSex.java | 8 + .../model/ppkt/PpktIndividual.java | 74 +++++ .../model/PpktIndividualTest.java | 85 ++++-- src/test/resources/data/GCDH_test_ppkt.json | 252 ++++++++++++++++++ 10 files changed, 526 insertions(+), 25 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java create mode 100644 src/test/resources/data/GCDH_test_ppkt.json diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java new file mode 100644 index 0000000..a448da8 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java @@ -0,0 +1,25 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +public class HpoOnsetAge implements PhenopacketAge { + + private final TermId tid; + private final String label; + + + public HpoOnsetAge(String id, String label) { + this.tid = TermId.of(id); + this.label = label; + } + + @Override + public String age() { + return label; + } + + @Override + public PhenopacketAgeType ageType() { + return PhenopacketAgeType.HPO_ONSET_AGE_TYPE; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java new file mode 100644 index 0000000..7b8937b --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java @@ -0,0 +1,22 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +public class Iso8601Age implements PhenopacketAge { + + private final String iso8601; + + public Iso8601Age(String iso) { + iso8601 = iso; + } + + + + @Override + public String age() { + return iso8601; + } + + @Override + public PhenopacketAgeType ageType() { + return PhenopacketAgeType.ISO8601_AGE_TYPE; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java new file mode 100644 index 0000000..16ffb8c --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java @@ -0,0 +1,47 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.core.Disease; + +import java.util.Optional; + +public class OntologyTerm { + + private final TermId tid; + private final String label; + private final boolean excluded; + private final PhenopacketAge age; + + public OntologyTerm(TermId tid, String label, boolean excluded, PhenopacketAge age) { + this.tid = tid; + this.label = label; + this.excluded = excluded; + this.age = age; + + } + + public OntologyTerm(TermId tid, String label, boolean excluded) { + this(tid, label, excluded, null); + + } + public OntologyTerm(TermId tid, String label) { + this(tid, label, false); + } + + public TermId getTid() { + return tid; + } + + public String getLabel() { + return label; + } + + public boolean isExcluded() { + return excluded; + } + + public Optional getAgeOpt() { + return Optional.ofNullable(age); + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java new file mode 100644 index 0000000..6e245e9 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java @@ -0,0 +1,10 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +public interface PhenopacketAge { + + String age(); + PhenopacketAgeType ageType(); + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java new file mode 100644 index 0000000..bee006c --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java @@ -0,0 +1,6 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +public enum PhenopacketAgeType { + + ISO8601_AGE_TYPE, HPO_ONSET_AGE_TYPE; +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java new file mode 100644 index 0000000..cdd1fb0 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java @@ -0,0 +1,22 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +public class PhenopacketDisease { + + private final TermId diseaseId; + private final String label; + + public TermId getDiseaseId() { + return diseaseId; + } + + public String getLabel() { + return label; + } + + public PhenopacketDisease(String diseaseId, String label) { + this.diseaseId = TermId.of(diseaseId); + this.label = label; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java new file mode 100644 index 0000000..ad2c343 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java @@ -0,0 +1,8 @@ +package org.monarchinitiative.phenopacket2prompt.model.ppkt; + +public enum PhenopacketSex { + + FEMALE, MALE, OTHER, UNKNOWN; + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java index 6e44ca9..d7b66f7 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java @@ -1,17 +1,22 @@ package org.monarchinitiative.phenopacket2prompt.model.ppkt; import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.TermId; import org.phenopackets.schema.v2.Phenopacket; import com.google.protobuf.util.JsonFormat; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; +import org.phenopackets.schema.v2.core.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileReader; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; public class PpktIndividual { Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); @@ -41,6 +46,75 @@ public String getPhenopacketId() { return phenopacketId; } + public PhenopacketSex getSex() { + Sex sex = ppkt.getSubject().getSex(); + return switch (sex) { + case MALE -> PhenopacketSex.MALE; + case FEMALE -> PhenopacketSex.FEMALE; + case OTHER_SEX -> PhenopacketSex.OTHER; + default -> PhenopacketSex.UNKNOWN; + }; + } + private Optional getAgeFromTimeElement(TimeElement telem) { + if (telem.hasAge()) { + return Optional.of(new Iso8601Age(telem.getAge().getIso8601Duration())); + } else if (telem.hasOntologyClass()) { + OntologyClass clz = telem.getOntologyClass(); + return Optional.of(new HpoOnsetAge(clz.getId(), clz.getLabel())); + } else { + return Optional.empty(); + } + } + + public Optional getAgeAtLastExamination() { + if (ppkt.getSubject().hasTimeAtLastEncounter()) { + TimeElement telem = ppkt.getSubject().getTimeAtLastEncounter(); + return getAgeFromTimeElement(telem); + } + return Optional.empty(); + } + + + public Optional getAgeAtOnset() { + if (ppkt.getDiseasesCount() == 1) { + Disease dx = ppkt.getDiseases(0); + if (dx.hasOnset()) { + TimeElement telem = dx.getOnset(); + return getAgeFromTimeElement(telem); + } + } + return Optional.empty(); + } + + + public List getDiseases() { + List diseases = new ArrayList<>(); + for (Disease d : ppkt.getDiseasesList()) { + if (d.getExcluded()) continue; + diseases.add(new PhenopacketDisease(d.getTerm().getId(), d.getTerm().getLabel())); + } + return diseases; + } + + public List getPhenotypicFeatures() { + List termList = new ArrayList<>(); + for (var pf : ppkt.getPhenotypicFeaturesList()) { + TermId hpoId = TermId.of(pf.getType().getId()); + String label = pf.getType().getLabel(); + boolean excluded = pf.getExcluded(); + Optional opt = Optional.empty(); + if (pf.hasOnset()) { + TimeElement telem = pf.getOnset(); + opt = getAgeFromTimeElement(telem); + } + if (opt.isPresent()) { + termList.add(new OntologyTerm(hpoId, label, excluded, opt.get())); + } else { + termList.add(new OntologyTerm(hpoId, label, excluded)); + } + } + return termList; + } } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java index 2e42d8e..0479f1f 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java @@ -2,55 +2,90 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.monarchinitiative.phenopacket2prompt.model.ppkt.PpktIndividual; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.monarchinitiative.phenopacket2prompt.model.ppkt.*; import java.io.File; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.net.URL; +import java.util.List; +import java.util.Optional; +import java.util.function.Predicate; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; public class PpktIndividualTest { - private static PpktIndividual ppktIndividual; + private static final String ppktPath = "data/GCDH_test_ppkt.json"; + private static final ClassLoader classLoader = PpktIndividualTest.class.getClassLoader(); + private static final URL resource = (classLoader.getResource(ppktPath)); + private static final File file = new File(resource.getFile()); + private static final PpktIndividual ppktIndividual = new PpktIndividual(file); - @BeforeAll - public static void init() { - String ppktPath = "/data/GCDH_test_ppkt.json"; - // ClassLoader classLoader = PpktIndividualTest.class.getClassLoader(); - // File file = new File(Objects.requireNonNull(classLoader.getResource(ppktPath)).getFile()); - Path resourceDirectory = Paths.get("src","test","resources", "data", "GCDH_test_ppkt.json"); - File file = resourceDirectory.toFile(); - ppktIndividual = new PpktIndividual(file); - System.out.println(ppktIndividual); - } @Test public void testCTOR() { - Path resourceDirectory = Paths.get("src","test","resources", "data", "GCDH_test_ppkt.json"); - File file = resourceDirectory.toFile(); - PpktIndividual i = new PpktIndividual(file); - System.out.println(ppktIndividual); Assertions.assertNotNull(ppktIndividual); } @Test public void testPhenopacketId() { - Path resourceDirectory = Paths.get("src","test","resources", "data", "GCDH_test_ppkt.json"); - File file = resourceDirectory.toFile(); - PpktIndividual i = new PpktIndividual(file); + String expected = "PMID_27672653_Individual_1"; + assertEquals(expected, ppktIndividual.getPhenopacketId()); + } - String expected = "PMID_27672653_Individual_1"; - assertEquals(expected, i.getPhenopacketId()); + @Test + public void testPhenopacketSex() { + assertEquals(PhenopacketSex.MALE, ppktIndividual.getSex()); } + @Test + public void testPhenopacketAgeLastEncounter() { + Optional opt = ppktIndividual.getAgeAtLastExamination(); + assertTrue(opt.isPresent()); + PhenopacketAge ppktAge = opt.get(); + assertEquals(PhenopacketAgeType.ISO8601_AGE_TYPE, ppktAge.ageType()); + String iso = ppktAge.age(); + assertEquals("P20Y", iso); + } + + @Test + public void testPhenopacketOnset() { + Optional opt = ppktIndividual.getAgeAtOnset(); + assertTrue(opt.isPresent()); + PhenopacketAge onsetAge = opt.get(); + assertEquals(PhenopacketAgeType.ISO8601_AGE_TYPE, onsetAge.ageType()); + String iso = onsetAge.age(); + assertEquals("P5M", iso); + } + @Test + public void testPhenopacketDisease() { + List diseases = ppktIndividual.getDiseases(); + assertEquals(1, diseases.size()); + PhenopacketDisease disease = diseases.get(0); + TermId expectedId = TermId.of("OMIM:231670"); + String expectedLabel = "Glutaricaciduria, type I"; + assertEquals(expectedId, disease.getDiseaseId()); + assertEquals(expectedLabel, disease.getLabel()); + } + @Test + public void testPhenotypicFeatures() { + List ppktFeatures = ppktIndividual.getPhenotypicFeatures(); + assertFalse(ppktFeatures.isEmpty()); + Predicate termPredicate = term -> term.getLabel().equals("Cerebral atrophy"); + Optional opt = ppktFeatures.stream().filter(termPredicate).findAny(); + assertTrue(opt.isPresent()); + OntologyTerm term = opt.get(); + assertEquals("Cerebral atrophy", term.getLabel()); + TermId expectedId = TermId.of("HP:0002059"); + assertEquals(expectedId, term.getTid()); + assertTrue(term.isExcluded()); + } } diff --git a/src/test/resources/data/GCDH_test_ppkt.json b/src/test/resources/data/GCDH_test_ppkt.json new file mode 100644 index 0000000..825ec9a --- /dev/null +++ b/src/test/resources/data/GCDH_test_ppkt.json @@ -0,0 +1,252 @@ +{ + "id": "PMID_27672653_Individual_1", + "subject": { + "id": "Individual 1", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P20Y" + } + }, + "sex": "MALE" + }, + "phenotypicFeatures": [ + { + "type": { + "id": "HP:0006846", + "label": "Acute encephalopathy" + } + }, + { + "type": { + "id": "HP:0034656", + "label": "Elevated urine 3-hydroxyglutaric level" + } + }, + { + "type": { + "id": "HP:0003150", + "label": "Glutaric aciduria" + } + }, + { + "type": { + "id": "HP:0002540", + "label": "Inability to walk" + } + }, + { + "type": { + "id": "HP:0001332", + "label": "Dystonia" + } + }, + { + "type": { + "id": "HP:0100952", + "label": "Enlarged sylvian cistern" + } + }, + { + "type": { + "id": "HP:0001250", + "label": "Seizure" + } + }, + { + "type": { + "id": "HP:0000256", + "label": "Macrocephaly" + } + }, + { + "type": { + "id": "HP:0000737", + "label": "Irritability" + } + }, + { + "type": { + "id": "HP:0100309", + "label": "Subdural hemorrhage" + } + }, + { + "type": { + "id": "HP:0001344", + "label": "Absent speech" + } + }, + { + "type": { + "id": "HP:0002059", + "label": "Cerebral atrophy" + }, + "excluded": true + }, + { + "type": { + "id": "HP:0000238", + "label": "Hydrocephalus" + }, + "excluded": true + }, + { + "type": { + "id": "HP:0002376", + "label": "Developmental regression" + }, + "excluded": true + } + ], + "interpretations": [ + { + "id": "Individual 1", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:231670", + "label": "Glutaricaciduria, type I" + }, + "genomicInterpretations": [ + { + "subjectOrBiosampleId": "Individual 1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "id": "var_gVfYFxrxtnRihuATBctbrhxNF", + "geneContext": { + "valueId": "HGNC:4189", + "symbol": "GCDH" + }, + "expressions": [ + { + "syntax": "hgvs.c", + "value": "NM_000159.4:c.892G>A" + }, + { + "syntax": "hgvs.g", + "value": "NC_000019.10:g.12896949G>A" + } + ], + "vcfRecord": { + "genomeAssembly": "hg38", + "chrom": "chr19", + "pos": "12896949", + "ref": "G", + "alt": "A" + }, + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }, + { + "subjectOrBiosampleId": "Individual 1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "id": "var_yyEZkMbLCZQqHWPMwYoeOZwvP", + "geneContext": { + "valueId": "HGNC:4189", + "symbol": "GCDH" + }, + "expressions": [ + { + "syntax": "hgvs.c", + "value": "NM_000159.4:c.1244-2A>C" + }, + { + "syntax": "hgvs.g", + "value": "NC_000019.10:g.12899466A>C" + } + ], + "vcfRecord": { + "genomeAssembly": "hg38", + "chrom": "chr19", + "pos": "12899466", + "ref": "A", + "alt": "C" + }, + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + } + ] + } + } + ], + "diseases": [ + { + "term": { + "id": "OMIM:231670", + "label": "Glutaricaciduria, type I" + }, + "onset": { + "age": { + "iso8601duration": "P5M" + } + } + } + ], + "metaData": { + "created": "2024-04-19T07:24:51.923413991Z", + "createdBy": "ORCID:0000-0002-0736-9199", + "resources": [ + { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "2022-03-05", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, + { + "id": "hgnc", + "name": "HUGO Gene Nomenclature Committee", + "url": "https://www.genenames.org", + "version": "06/01/23", + "namespacePrefix": "HGNC", + "iriPrefix": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/" + }, + { + "id": "omim", + "name": "An Online Catalog of Human Genes and Genetic Disorders", + "url": "https://www.omim.org", + "version": "January 4, 2023", + "namespacePrefix": "OMIM", + "iriPrefix": "https://www.omim.org/entry/" + }, + { + "id": "so", + "name": "Sequence types and features ontology", + "url": "http://purl.obolibrary.org/obo/so.obo", + "version": "2021-11-22", + "namespacePrefix": "SO", + "iriPrefix": "http://purl.obolibrary.org/obo/SO_" + }, + { + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2024-04-04", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + } + ], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [ + { + "id": "PMID:27672653", + "reference": "https://pubmed.ncbi.nlm.nih.gov/27672653", + "description": "Clinical and Mutational Analysis of the GCDH Gene in Malaysian Patients with Glutaric Aciduria Type 1" + } + ] + } +} \ No newline at end of file From 29bccf1c9d6bd74ba63f59fa63543eb4f4a34b45 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 24 Apr 2024 09:44:54 +0200 Subject: [PATCH 3/6] refactoring output --- pom.xml | 2 +- .../cmd/GptTranslateCommand.java | 10 +- .../cmd/OntoGptCommand.java | 2 +- .../{model => legacy}/AdditionalConcept.java | 2 +- .../{model => legacy}/AdditionalConceptI.java | 2 +- .../AdditionalConceptType.java | 2 +- .../AdditionalReplacementConceptType.java | 2 +- .../{model => legacy}/TimeSegment.java | 2 +- .../{ => legacy}/nejm/Dehyphenizer.java | 2 +- .../nejm/NejmCaseReportFromPdfFilterer.java | 8 +- .../nejm/NejmCaseReportImporter.java | 2 +- .../nejm/NejmCaseReportIngestor.java | 2 +- .../model/AgeNotSpecified.java | 41 +++++++ .../phenopacket2prompt/model/HpoOnsetAge.java | 83 ++++++++++++++ .../model/HpoPhenotypicFeaturesAtAge.java | 4 + .../phenopacket2prompt/model/Iso8601Age.java | 101 +++++++++++++++++ .../model/{ppkt => }/OntologyTerm.java | 3 +- .../model/PhenopacketAge.java | 19 ++++ .../model/PhenopacketAgeType.java | 6 + .../model/{ppkt => }/PhenopacketDisease.java | 2 +- .../model/{ppkt => }/PhenopacketSex.java | 2 +- .../model/{ppkt => }/PpktIndividual.java | 24 ++-- .../model/ppkt/HpoOnsetAge.java | 25 ---- .../model/ppkt/Iso8601Age.java | 22 ---- .../model/ppkt/PhenopacketAge.java | 10 -- .../model/ppkt/PhenopacketAgeType.java | 6 - .../output/PhenopacketAgeGenerator.java | 12 ++ .../output/PhenopacketSexGenerator.java | 11 ++ .../output/PhenopacketTextGenerator.java | 35 ++++++ .../PpktPhenotypicFeatureGenerator.java | 44 +++++++ .../output/PromptGenerator.java | 40 +++++++ .../output/impl/EnglishPromptGenerator.java | 107 ++++++++++++++++++ .../impl/english/PhenopacketSexEnglish.java | 58 ++++++++++ .../output/impl/english/PpktAgeEnglish.java | 45 ++++++++ .../english/PpktPhenotypicfeatureEnglish.java | 26 +++++ .../output/impl/english/PpktTextEnglish.java | 37 ++++++ .../querygen/PhenopacketFactoryIngestor.java | 2 +- .../querygen/QueryPromptFactory.java | 2 +- .../qfactory/AbstractQueryGenerator.java | 4 +- .../qfactory/PhenopacketOnlyQuery.java | 2 +- .../querygen/qfactory/QcQueryGenerator.java | 2 +- .../TextWithManualAnnotsGenerator.java | 6 +- .../qfactory/TextWithoutDiscussionQuery.java | 2 +- .../nejm/DehyphenizerTest.java | 1 + 44 files changed, 721 insertions(+), 101 deletions(-) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{model => legacy}/AdditionalConcept.java (90%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{model => legacy}/AdditionalConceptI.java (95%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{model => legacy}/AdditionalConceptType.java (95%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{model => legacy}/AdditionalReplacementConceptType.java (93%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{model => legacy}/TimeSegment.java (87%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/nejm/Dehyphenizer.java (96%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/nejm/NejmCaseReportFromPdfFilterer.java (96%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/nejm/NejmCaseReportImporter.java (98%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/nejm/NejmCaseReportIngestor.java (98%) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoPhenotypicFeaturesAtAge.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java rename src/main/java/org/monarchinitiative/phenopacket2prompt/model/{ppkt => }/OntologyTerm.java (89%) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAgeType.java rename src/main/java/org/monarchinitiative/phenopacket2prompt/model/{ppkt => }/PhenopacketDisease.java (87%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/model/{ppkt => }/PhenopacketSex.java (53%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/model/{ppkt => }/PpktIndividual.java (81%) delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java diff --git a/pom.xml b/pom.xml index e4a6135..bd1f0bf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.7 + 0.3.8 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index ba8b1b2..ccd4f7d 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -3,6 +3,8 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; @@ -22,6 +24,9 @@ public class GptTranslateCommand implements Callable { description = "path to HP json file") private String hpoJsonPath = "data/hp.json"; + @CommandLine.Option(names = {"-p", "--ppkt"}, description = "Path to JSON phenopacket file", required = true) + private String ppkt; + @Override public Integer call() throws Exception { @@ -32,7 +37,10 @@ public Integer call() throws Exception { Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); LOGGER.info("HPO version {}", hpo.version().orElse("n/a")); System.out.println(hpo.version().orElse("n/a")); - + PromptGenerator generator = PromptGenerator.english(hpo); + PpktIndividual individual = new PpktIndividual(new File(ppkt)); + String prompt = generator.createPrompt(individual); + System.out.println(prompt); return 0; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java index 1e9d1f3..a8e2cea 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java @@ -3,7 +3,7 @@ import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportIngestor; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportIngestor; import org.monarchinitiative.phenopacket2prompt.querygen.*; import picocli.CommandLine; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConcept.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConcept.java similarity index 90% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConcept.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConcept.java index a927fb8..6facd36 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConcept.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConcept.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model; +package org.monarchinitiative.phenopacket2prompt.legacy; public record AdditionalConcept(AdditionalConceptType ctype,String text) implements AdditionalConceptI { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConceptI.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConceptI.java similarity index 95% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConceptI.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConceptI.java index c6bde66..084d42e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConceptI.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConceptI.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model; +package org.monarchinitiative.phenopacket2prompt.legacy; /** diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConceptType.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConceptType.java similarity index 95% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConceptType.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConceptType.java index c28e5d5..9c03569 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalConceptType.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConceptType.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model; +package org.monarchinitiative.phenopacket2prompt.legacy; import org.monarchinitiative.phenol.base.PhenolRuntimeException; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalReplacementConceptType.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalReplacementConceptType.java similarity index 93% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalReplacementConceptType.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalReplacementConceptType.java index 4c44e1e..89f7bbb 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AdditionalReplacementConceptType.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalReplacementConceptType.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model; +package org.monarchinitiative.phenopacket2prompt.legacy; public record AdditionalReplacementConceptType(AdditionalConceptType ctype, String text, diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/TimeSegment.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/TimeSegment.java similarity index 87% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/TimeSegment.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/TimeSegment.java index d472392..548bc2e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/TimeSegment.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/TimeSegment.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model; +package org.monarchinitiative.phenopacket2prompt.legacy; public class TimeSegment { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/Dehyphenizer.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/Dehyphenizer.java similarity index 96% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/Dehyphenizer.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/Dehyphenizer.java index 7de6b8b..90fd7df 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/Dehyphenizer.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/Dehyphenizer.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.nejm; +package org.monarchinitiative.phenopacket2prompt.legacy.nejm; import java.util.ArrayList; import java.util.Arrays; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportFromPdfFilterer.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportFromPdfFilterer.java similarity index 96% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportFromPdfFilterer.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportFromPdfFilterer.java index 6a6e6e1..692da1a 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportFromPdfFilterer.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportFromPdfFilterer.java @@ -1,10 +1,10 @@ -package org.monarchinitiative.phenopacket2prompt.nejm; +package org.monarchinitiative.phenopacket2prompt.legacy.nejm; import org.monarchinitiative.phenol.base.PhenolRuntimeException; -import org.monarchinitiative.phenopacket2prompt.model.AdditionalConcept; -import org.monarchinitiative.phenopacket2prompt.model.AdditionalConceptI; -import org.monarchinitiative.phenopacket2prompt.model.AdditionalReplacementConceptType; +import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalConcept; +import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalConceptI; +import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalReplacementConceptType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportImporter.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportImporter.java similarity index 98% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportImporter.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportImporter.java index 84dcadc..3d21682 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportImporter.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportImporter.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.nejm; +package org.monarchinitiative.phenopacket2prompt.legacy.nejm; import org.monarchinitiative.phenol.base.PhenolRuntimeException; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportIngestor.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportIngestor.java similarity index 98% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportIngestor.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportIngestor.java index 8bda331..2fa4911 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/nejm/NejmCaseReportIngestor.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/nejm/NejmCaseReportIngestor.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.nejm; +package org.monarchinitiative.phenopacket2prompt.legacy.nejm; import org.monarchinitiative.phenol.base.PhenolRuntimeException; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java new file mode 100644 index 0000000..7987ae1 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java @@ -0,0 +1,41 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +public class AgeNotSpecified implements PhenopacketAge { + @Override + public String age() { + return ""; + } + + @Override + public PhenopacketAgeType ageType() { + return PhenopacketAgeType.NOT_SPECIFIED; + } + + @Override + public boolean isChild() { + return false; + } + + @Override + public boolean isInfant() { + return false; + } + + @Override + public boolean isFetus() { + return false; + } + + @Override + public boolean isCongenital() { + return false; + } + + @Override + public int totalDays() { + return 0; + } + + @Override + public boolean specified() {return false; } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java new file mode 100644 index 0000000..6c7cd41 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java @@ -0,0 +1,83 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +import org.monarchinitiative.phenol.annotations.formats.hpo.HpoOnset; +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.util.Optional; +import java.util.Set; + +public class HpoOnsetAge implements PhenopacketAge { + + private final TermId tid; + private final String label; + + private final int totalDays; + + + /** One of Antenatal onset HP:0030674; Fetal onset HP:0011461; Late first trimester onset HP:0034199; + * Third trimester onset HP:0034197; Second trimester onset HP:0034198; Embryonal onset HP:0011460*/ + private final static Set fetalIds = Set.of(TermId.of(" HP:0030674"), TermId.of("HP:0011461"), TermId.of("HP:0034199"), + TermId.of("HP:0034197"), TermId.of("HP:0034198"), TermId.of("HP:0011460*")); + + /** Childhood onset */ + private final static TermId childhoodOnset = TermId.of("HP:0011463"); + + + /** Infantile onset */ + private final static TermId infantileOnset = TermId.of("HP:0003593"); + + /** Congenital onset */ + private final static TermId congenitalOnset = TermId.of("HP:0003577"); + + + + public HpoOnsetAge(String id, String label) { + this.tid = TermId.of(id); + this.label = label; + Optional opt = HpoOnset.fromTermId(tid); + if (opt.isPresent()) { + HpoOnset onset = opt.get(); + totalDays = (int) ( onset.start().days() /2+ onset.end().days()/2); + } else { + totalDays = Integer.MAX_VALUE; + } + + } + + @Override + public String age() { + return label; + } + + @Override + public PhenopacketAgeType ageType() { + return PhenopacketAgeType.HPO_ONSET_AGE_TYPE; + } + + + @Override + public boolean isChild() { + return tid.equals(childhoodOnset); + } + + @Override + public boolean isInfant() { + return tid.equals(infantileOnset); + } + + @Override + public boolean isCongenital() { + return tid.equals(congenitalOnset); + } + + @Override + public boolean isFetus() { + return fetalIds.contains(tid); + } + + + @Override + public int totalDays() { + return totalDays; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoPhenotypicFeaturesAtAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoPhenotypicFeaturesAtAge.java new file mode 100644 index 0000000..d1ab129 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoPhenotypicFeaturesAtAge.java @@ -0,0 +1,4 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +public class HpoPhenotypicFeaturesAtAge { +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java new file mode 100644 index 0000000..0c51307 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java @@ -0,0 +1,101 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; + +public class Iso8601Age implements PhenopacketAge { + + private final String iso8601; + + private final int years; + private final int months; + private final int days; + + private final int totalDays; + + public Iso8601Age(String iso) { + iso8601 = iso; + String ageString = iso; + if (! ageString.startsWith("P")) { + throw new PhenolRuntimeException("Malformed iso8601 age \"" + iso + "\""); + } + ageString = ageString.substring(1); + int i = ageString.indexOf("Y"); + if (i>0) { + years = Integer.parseInt(ageString.substring(0,i)); + ageString = ageString.substring(i+1); + } else { + years = 0; + } + i = ageString.indexOf("M"); + if (i>0) { + months = Integer.parseInt(ageString.substring(0,i)); + ageString = ageString.substring(i+1); + } else { + months = 0; + } + i = ageString.indexOf("D"); + if (i>0) { + days = Integer.parseInt(ageString.substring(0,i)); + ageString = ageString.substring(i+1); + } else { + days = 0; + } + totalDays = (int) ( days + 30.437*months + 365.25*years); + } + + public int getYears() { + return years; + } + + public int getMonths() { + return months; + } + + public int getDays() { + return days; + } + + @Override + public String age() { + StringBuilder sb = new StringBuilder(); + if (years > 0) { + return String.format("%d year-old", years); + } else if (months > 0) { + return String.format("%d month-old", months); + } else { + return String.format("%d day-old", days); + } + } + + @Override + public PhenopacketAgeType ageType() { + return PhenopacketAgeType.ISO8601_AGE_TYPE; + } + + @Override + public boolean isChild() { + return years >= 1 && years < 10; + } + + @Override + public boolean isInfant() { + return years < 1; + } + + @Override + public boolean isFetus() { + // always false because we cannot express prenatal ages with iso + return false; + } + + @Override + public boolean isCongenital() { + // rarely. Usually we use Hpo Onset for congenital + return years == 0 && months == 0 && days == 0; + } + + @Override + public int totalDays() { + return totalDays; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/OntologyTerm.java similarity index 89% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/model/OntologyTerm.java index 16ffb8c..7b51a4e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/OntologyTerm.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/OntologyTerm.java @@ -1,7 +1,6 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; +package org.monarchinitiative.phenopacket2prompt.model; import org.monarchinitiative.phenol.ontology.data.TermId; -import org.phenopackets.schema.v2.core.Disease; import java.util.Optional; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java new file mode 100644 index 0000000..a08a1d1 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java @@ -0,0 +1,19 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +public interface PhenopacketAge { + + String age(); + PhenopacketAgeType ageType(); + + boolean isChild(); + + boolean isInfant(); + + boolean isFetus(); + + boolean isCongenital(); + + int totalDays(); + + default boolean specified() {return true; } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAgeType.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAgeType.java new file mode 100644 index 0000000..0151a3f --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAgeType.java @@ -0,0 +1,6 @@ +package org.monarchinitiative.phenopacket2prompt.model; + +public enum PhenopacketAgeType { + + ISO8601_AGE_TYPE, HPO_ONSET_AGE_TYPE, NOT_SPECIFIED; +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketDisease.java similarity index 87% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketDisease.java index cdd1fb0..bb6ead6 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketDisease.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketDisease.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; +package org.monarchinitiative.phenopacket2prompt.model; import org.monarchinitiative.phenol.ontology.data.TermId; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketSex.java similarity index 53% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketSex.java index ad2c343..a31fd27 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketSex.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketSex.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; +package org.monarchinitiative.phenopacket2prompt.model; public enum PhenopacketSex { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java similarity index 81% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java index d7b66f7..6857244 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PpktIndividual.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; +package org.monarchinitiative.phenopacket2prompt.model; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.data.TermId; @@ -14,9 +14,7 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; +import java.util.*; public class PpktIndividual { Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); @@ -98,9 +96,16 @@ public List getDiseases() { return diseases; } - public List getPhenotypicFeatures() { - List termList = new ArrayList<>(); + public Map> getPhenotypicFeatures() { + Map> ageToFeatureMap = new HashMap<>(); + PhenopacketAge notSpecified = new AgeNotSpecified(); + ageToFeatureMap.put(notSpecified, new ArrayList<>()); for (var pf : ppkt.getPhenotypicFeaturesList()) { + OntologyClass clz = pf.getType(); + if (clz.getId().isEmpty()) { + System.err.println("Warning, empty ontology term"); + continue; + } TermId hpoId = TermId.of(pf.getType().getId()); String label = pf.getType().getLabel(); boolean excluded = pf.getExcluded(); @@ -110,11 +115,12 @@ public List getPhenotypicFeatures() { opt = getAgeFromTimeElement(telem); } if (opt.isPresent()) { - termList.add(new OntologyTerm(hpoId, label, excluded, opt.get())); + ageToFeatureMap.putIfAbsent(opt.get(), new ArrayList<>()); + ageToFeatureMap.get(opt.get()).add(new OntologyTerm(hpoId, label, excluded, opt.get())); } else { - termList.add(new OntologyTerm(hpoId, label, excluded)); + ageToFeatureMap.get(notSpecified).add(new OntologyTerm(hpoId, label, excluded)); } } - return termList; + return ageToFeatureMap; } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java deleted file mode 100644 index a448da8..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/HpoOnsetAge.java +++ /dev/null @@ -1,25 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; - -import org.monarchinitiative.phenol.ontology.data.TermId; - -public class HpoOnsetAge implements PhenopacketAge { - - private final TermId tid; - private final String label; - - - public HpoOnsetAge(String id, String label) { - this.tid = TermId.of(id); - this.label = label; - } - - @Override - public String age() { - return label; - } - - @Override - public PhenopacketAgeType ageType() { - return PhenopacketAgeType.HPO_ONSET_AGE_TYPE; - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java deleted file mode 100644 index 7b8937b..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/Iso8601Age.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; - -public class Iso8601Age implements PhenopacketAge { - - private final String iso8601; - - public Iso8601Age(String iso) { - iso8601 = iso; - } - - - - @Override - public String age() { - return iso8601; - } - - @Override - public PhenopacketAgeType ageType() { - return PhenopacketAgeType.ISO8601_AGE_TYPE; - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java deleted file mode 100644 index 6e245e9..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAge.java +++ /dev/null @@ -1,10 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; - -public interface PhenopacketAge { - - String age(); - PhenopacketAgeType ageType(); - - - -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java deleted file mode 100644 index bee006c..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/ppkt/PhenopacketAgeType.java +++ /dev/null @@ -1,6 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.model.ppkt; - -public enum PhenopacketAgeType { - - ISO8601_AGE_TYPE, HPO_ONSET_AGE_TYPE; -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java new file mode 100644 index 0000000..1a01c97 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java @@ -0,0 +1,12 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; + +public interface PhenopacketAgeGenerator { + + String age(PhenopacketAge ppktAge); + + String atAge(PhenopacketAge ppktAge); + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java new file mode 100644 index 0000000..0662d70 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java @@ -0,0 +1,11 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; + +public interface PhenopacketSexGenerator { + + + String ppktSex(PpktIndividual individual); + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java new file mode 100644 index 0000000..b850365 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java @@ -0,0 +1,35 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import java.util.List; + +public interface PhenopacketTextGenerator { + + + + String QUERY_HEADER(); + + + + /** + * @param items a list of HPO labels, e.g., X and Y and Z + * @return A string formatted as X, Y, and Z. + */ + default String getOxfordCommaList(List items, String andWord) { + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + String andWithSpace = String.format(" %s ", andWord); + return String.join(andWithSpace, items) + "."; + } + StringBuilder sb = new StringBuilder(); + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + String andWithSpaceAndComma = String.format(", %s ", andWord); + symList = symList.substring(0, jj) + andWithSpaceAndComma + symList.substring(jj+2); + } + sb.append(symList).append("."); + return sb.toString(); + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java new file mode 100644 index 0000000..dea0895 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -0,0 +1,44 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; + +import java.util.List; +import java.util.function.Predicate; + +public interface PpktPhenotypicFeatureGenerator { + + + String featureList( List ontologyTerms); + + String excludedFeatureList( List ontologyTerms); + + + default boolean hasObservedFeatures( List ontologyTerms) { + return ontologyTerms.stream().anyMatch(Predicate.not(OntologyTerm::isExcluded)); + } + + default boolean hasExcludedFeatures( List ontologyTerms) { + return ontologyTerms.stream().anyMatch(OntologyTerm::isExcluded); + } + + default String getOxfordCommaList(List items, String andWord) { + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + String andWithSpace = String.format(" %s ", andWord); + return String.join(andWithSpace, items) + "."; + } + StringBuilder sb = new StringBuilder(); + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + String andWithSpaceAndComma = String.format(", %s ", andWord); + symList = symList.substring(0, jj) + andWithSpaceAndComma + symList.substring(jj+2); + } + sb.append(symList); + return sb.toString(); + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java new file mode 100644 index 0000000..4fb5cc7 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -0,0 +1,40 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.impl.EnglishPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.PhenopacketSexEnglish; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktAgeEnglish; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktPhenotypicfeatureEnglish; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktTextEnglish; + +public interface PromptGenerator { + + + String queryHeader(); + String getIndividualInformation(PpktIndividual ppktIndividual); + String getPhenotypicFeatures(PpktIndividual ppktIndividual); + + + + public static PromptGenerator english(Ontology ontology){ + PhenopacketSexGenerator sgen = new PhenopacketSexEnglish(); + PhenopacketAgeGenerator page = new PpktAgeEnglish(); + PhenopacketTextGenerator ptext = new PpktTextEnglish(); + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureEnglish(); + return new EnglishPromptGenerator(ontology, sgen, page, ptext, pfgen); + } + + + + default String createPrompt(PpktIndividual individual) { + StringBuilder sb = new StringBuilder(); + sb.append(queryHeader()); + sb.append(getIndividualInformation(individual)); + sb.append(getPhenotypicFeatures(individual)); + + return sb.toString(); + + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java new file mode 100644 index 0000000..9df0cba --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java @@ -0,0 +1,107 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.*; + +import java.util.*; +import java.util.stream.Collectors; + +public class EnglishPromptGenerator implements PromptGenerator { + + private final Ontology hpo; + + private final PhenopacketSexGenerator sexGenerator; + + private final PhenopacketAgeGenerator ppktAgeGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public EnglishPromptGenerator(Ontology hpo, PhenopacketSexGenerator sgen, PhenopacketAgeGenerator page, PhenopacketTextGenerator ptext, PpktPhenotypicFeatureGenerator pfgen){ + this.hpo = hpo; + sexGenerator = sgen; + ppktAgeGenerator = page; + ppktTextGenerator = ptext; + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + + + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + StringBuilder sb = new StringBuilder(); + String sex = sexGenerator.ppktSex(ppktIndividual); + Optional lastAgeOpt = ppktIndividual.getAgeAtLastExamination(); + Optional onsetOpt = ppktIndividual.getAgeAtOnset(); + if (lastAgeOpt.isPresent()) { + PhenopacketAge lastExamAge = lastAgeOpt.get(); + String examAge = ppktAgeGenerator.age(lastExamAge); + sb.append("The proband was a ").append(examAge).append( " ").append(sex).append(". "); + } else { + sb.append("The proband was a ").append(sex).append(". "); + } + if (onsetOpt.isPresent()) { + PhenopacketAge onsetAge = onsetOpt.get(); + String onset = ppktAgeGenerator.age(onsetAge); + sb.append("Initial manifestations of disease appeared when the proband was ").append(onset).append(". "); + } + return sb.toString(); + } + + @Override + public String getPhenotypicFeatures(PpktIndividual ppktIndividual) { + StringBuilder sb = new StringBuilder(); + Map> termMap = ppktIndividual.getPhenotypicFeatures(); + List ageList = new ArrayList<>(termMap.keySet()); + Collections.sort(ageList,(a, b) -> Integer.compare(a.totalDays(), b.totalDays())); + for (var age: ageList) { + List terms = termMap.get(age); + if (! age.specified()) { + if (termMap.size() > 1) { + // if size is greater than one, there was at least one specified time point + if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { + sb.append("Additional features included ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); + } + if (ppktPhenotypicFeatureGenerator.hasExcludedFeatures(terms)) { + sb.append("Additional excluded features were ").append(ppktPhenotypicFeatureGenerator.excludedFeatureList(terms)).append(". "); + } + } else { + if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { + sb.append("The following clinical manifestations were observed: ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); + } + if (ppktPhenotypicFeatureGenerator.hasExcludedFeatures(terms)) { + sb.append("The following clinical manifestations were excluded: ").append(ppktPhenotypicFeatureGenerator.excludedFeatureList(terms)).append(". "); + } + } + } else { + String ageString = ppktAgeGenerator.age(age); + + if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { + sb.append(ageString).append(", the following clinical manifestations were observed: ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); + } + if (ppktPhenotypicFeatureGenerator.hasExcludedFeatures(terms)) { + sb.append(ageString).append(", the following clinical manifestations were excluded: ").append(ppktPhenotypicFeatureGenerator.excludedFeatureList(terms)).append(". "); + } + } + } + + return sb.toString(); + } + + + + + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java new file mode 100644 index 0000000..523b3f5 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java @@ -0,0 +1,58 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketSexGenerator; + +import java.util.Optional; + +public class PhenopacketSexEnglish implements PhenopacketSexGenerator { + @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java new file mode 100644 index 0000000..a3bd78c --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java @@ -0,0 +1,45 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAgeType; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketAgeGenerator; + +public class PpktAgeEnglish implements PhenopacketAgeGenerator { + @Override + public String age(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return ppktAge.age() + " old"; + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "infant"; + case "Childhood onset" -> "child"; + case "Neonatal onset" -> "neonate"; + case "Congenital onset" -> "born"; + case "Adult onset" -> "adult"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "At an age of " + ppktAge.age(); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "During the infantile period"; + case "Childhood onset" -> "During childhood"; + case "Neonatal onset" -> "During the neonatal period"; + case "Congenital onset" -> "At birth"; + case "Adult onset" -> "As an adult"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java new file mode 100644 index 0000000..b2ef31a --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java @@ -0,0 +1,26 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.List; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureEnglish implements PpktPhenotypicFeatureGenerator { + @Override + public String featureList(List ontologyTerms) { + List labels = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .map(OntologyTerm::getLabel).toList(); + return getOxfordCommaList(labels, "and"); + } + + @Override + public String excludedFeatureList(List ontologyTerms) { + List labels = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded) + .map(OntologyTerm::getLabel).toList(); + return getOxfordCommaList(labels, "and"); + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java new file mode 100644 index 0000000..cd63858 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java @@ -0,0 +1,37 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextEnglish implements PhenopacketTextGenerator { + @Override + public String QUERY_HEADER() { + return """ +I am running an experiment on a clinicopathological case conference to see how your diagnoses +compare with those of human experts. I am going to give you part of a medical case. These have +all been published in the New England Journal of Medicine. You are not trying to treat any patients. +As you read the case, you will notice that there are expert discussants giving their thoughts. +In this case, you are “Dr. GPT-4,” an Al language model who is discussing the case along with +human experts. A clinicopathological case conference has several unspoken rules. The first is +that there is most often a single definitive diagnosis (though rarely there may be more than one), +and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always +confirmed by some sort of clinical pathology test or anatomic pathology test, though in +rare cases when such a test does not exist for a diagnosis the diagnosis can instead be +made using validated clinical criteria or very rarely just confirmed by expert opinion. +You will be told at the end of the case description whether a diagnostic test/tests are +being ordered, which you can assume will make the diagnosis/diagnoses. After you read the case, +I want you to give two pieces of information. The first piece of information is your most likely +diagnosis/diagnoses. You need to be as specific as possible -- the goal is to get the correct +answer, not a broad category of answers. You do not need to explain your reasoning, just give +the diagnosis/diagnoses. The second piece of information is to give a robust differential diagnosis, +ranked by their probability so that the most likely diagnosis is at the top, and the least likely +is at the bottom. There is no limit to the number of diagnoses on your differential. You can give +as many diagnoses as you think are reasonable. You do not need to explain your reasoning, +just list the diagnoses. Again, the goal is to be as specific as possible with each of the +diagnoses. +Do you have any questions, Dr. GPT-4? + +Here is the case: + +"""; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java index 2319d72..0d3cc8c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java @@ -2,7 +2,7 @@ import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java index 06e5915..6c90649 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java @@ -3,7 +3,7 @@ import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.QcQueryGenerator; import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.TextWithManualAnnotsGenerator; import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.TextWithoutDiscussionQuery; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java index 16cf232..ea3407a 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java @@ -8,8 +8,8 @@ import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenol.ontology.data.TermId; -import org.monarchinitiative.phenopacket2prompt.model.TimeSegment; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.TimeSegment; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; import org.monarchinitiative.phenopacket2prompt.querygen.PhenotypicFeatureFilter; import org.monarchinitiative.phenopacket2prompt.querygen.TimePoint; import org.monarchinitiative.phenopacket2prompt.querygen.TimePointParser; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java index 99c2faa..7e24994 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java @@ -2,7 +2,7 @@ import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; public class PhenopacketOnlyQuery extends AbstractQueryGenerator { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java index 75edc79..c1591e0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java @@ -2,7 +2,7 @@ import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; import java.util.List; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java index 12de701..8eea8cc 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java @@ -3,9 +3,9 @@ import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.model.AdditionalConceptI; -import org.monarchinitiative.phenopacket2prompt.model.AdditionalConceptType; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalConceptI; +import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalConceptType; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; import org.monarchinitiative.phenopacket2prompt.querygen.TimePoint; import org.monarchinitiative.phenopacket2prompt.querygen.TimePointParser; import org.phenopackets.schema.v2.core.OntologyClass; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java index de4cf3c..2eb967b 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java @@ -2,7 +2,7 @@ import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenopacket2prompt.nejm.NejmCaseReportFromPdfFilterer; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; import java.util.List; diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/nejm/DehyphenizerTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/nejm/DehyphenizerTest.java index a9f13c0..0f2bff2 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/nejm/DehyphenizerTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/nejm/DehyphenizerTest.java @@ -2,6 +2,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenopacket2prompt.legacy.nejm.Dehyphenizer; import java.util.ArrayList; import java.util.List; From 23a4ccf59b498b99b9c2b9c42083119ab25634bd Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 24 Apr 2024 11:10:06 +0200 Subject: [PATCH 4/6] parse international --- pom.xml | 2 +- .../cmd/DownloadCommand.java | 6 +- .../cmd/OntoGptCommand.java | 6 +- .../international/HpInternational.java | 29 +++ .../HpInternationalOboParser.java | 104 ++++++++ .../querygen/PhenopacketFactoryIngestor.java | 2 +- .../querygen/PhenotypicFeatureFilter.java | 2 +- .../querygen/QueryOutputGenerator.java | 2 +- .../querygen/QueryOutputType.java | 2 +- .../querygen/QueryPromptFactory.java | 10 +- .../{ => legacy}/querygen/TimePoint.java | 2 +- .../querygen/TimePointParser.java | 2 +- .../qfactory/AbstractQueryGenerator.java | 8 +- .../qfactory/PhenopacketOnlyQuery.java | 2 +- .../querygen/qfactory/QcQueryGenerator.java | 2 +- .../TextWithManualAnnotsGenerator.java | 6 +- .../qfactory/TextWithoutDiscussionQuery.java | 2 +- .../HpInternationalOboParserTest.java | 32 +++ .../model/PpktIndividualTest.java | 17 +- .../querygen/TimePointParserTest.java | 2 + .../resources/data/small-hp-international.obo | 224 ++++++++++++++++++ 21 files changed, 434 insertions(+), 30 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/PhenopacketFactoryIngestor.java (97%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/PhenotypicFeatureFilter.java (98%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/QueryOutputGenerator.java (96%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/QueryOutputType.java (87%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/QueryPromptFactory.java (86%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/TimePoint.java (82%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/TimePointParser.java (98%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/qfactory/AbstractQueryGenerator.java (98%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/qfactory/PhenopacketOnlyQuery.java (91%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/qfactory/QcQueryGenerator.java (92%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/qfactory/TextWithManualAnnotsGenerator.java (97%) rename src/main/java/org/monarchinitiative/phenopacket2prompt/{ => legacy}/querygen/qfactory/TextWithoutDiscussionQuery.java (90%) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParserTest.java create mode 100644 src/test/resources/data/small-hp-international.obo diff --git a/pom.xml b/pom.xml index bd1f0bf..01800b4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.8 + 0.3.10 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/DownloadCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/DownloadCommand.java index 6117214..612756f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/DownloadCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/DownloadCommand.java @@ -9,6 +9,8 @@ import picocli.CommandLine; import java.io.File; +import java.net.MalformedURLException; +import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; @@ -33,11 +35,13 @@ public class DownloadCommand implements Callable{ public boolean overwrite; @Override - public Integer call() throws FileDownloadException { + public Integer call() throws FileDownloadException, MalformedURLException { logger.info(String.format("Download analysis to %s", datadir)); Path destination = Paths.get(datadir); BioDownloaderBuilder builder = BioDownloader.builder(destination); builder.hpoJson(); + URL hpoInternational = new URL("https://github.com/obophenotype/human-phenotype-ontology/releases/latest/download/hp-international.obo"); + builder.custom("hp-international.obo", hpoInternational); BioDownloader downloader = builder.build(); List files = downloader.download(); return 0; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java index a8e2cea..8e6c075 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java @@ -4,6 +4,10 @@ import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportIngestor; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.PhenopacketFactoryIngestor; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryOutputGenerator; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryOutputType; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryPromptFactory; import org.monarchinitiative.phenopacket2prompt.querygen.*; import picocli.CommandLine; @@ -11,7 +15,7 @@ import java.util.*; import java.util.concurrent.Callable; -import static org.monarchinitiative.phenopacket2prompt.querygen.QueryOutputType.*; +import static org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryOutputType.*; @CommandLine.Command(name = "gpt-time", aliases = {"G"}, diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java new file mode 100644 index 0000000..2823dc7 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java @@ -0,0 +1,29 @@ +package org.monarchinitiative.phenopacket2prompt.international; + +import org.monarchinitiative.phenol.ontology.data.TermId; +import java.util.Map; +import java.util.HashMap; + +public class HpInternational { + + private final String languageAcronym; + + private final Map termIdToLabelMap; + + public HpInternational(String language) { + languageAcronym = language; + termIdToLabelMap = new HashMap<>(); + } + + public void addTerm(TermId tid, String label) { + this.termIdToLabelMap.put(tid, label); + } + + public String getLanguageAcronym() { + return languageAcronym; + } + + public Map getTermIdToLabelMap() { + return termIdToLabelMap; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java new file mode 100644 index 0000000..815b85e --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java @@ -0,0 +1,104 @@ +package org.monarchinitiative.phenopacket2prompt.international; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class HpInternationalOboParser { + + + private final Map languageToInternationalMap; + + private static final String ENGLISH = "en"; + + + /** + * Extract language acronym + * @param annots a String such as source:value="Split hand", translation:status="OFFICIAL", source:language="en", translation:language="tr"} + * @return in this case "tr" + */ + public Optional getLanguage(String annots) { + final String translation = "translation:language=\"(\\w{2,2})\""; + final Pattern pattern = Pattern.compile(translation); + Matcher matcher = pattern.matcher(annots); + if (matcher.find()) { + String language = matcher.group(1); + return Optional.of(language); + } else { + return Optional.empty(); + } + } + + public HpInternationalOboParser(File file) { + languageToInternationalMap = new HashMap<>(); + String pattern = "id: (HP:\\d{7,7})"; + Set acronyms = Set.of("cs", "en", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); + for (String acronym : acronyms) { + languageToInternationalMap.put(acronym, new HpInternational(acronym)); + } + // Create a Pattern object + Pattern idLinePattern = Pattern.compile(pattern); + try (BufferedReader br = new BufferedReader(new FileReader(file))) { + String line; + boolean inHpTerm = false; + TermId currentHpoTermId = null; + while ((line = br.readLine()) != null) { + Matcher matcher = idLinePattern.matcher(line); + if (matcher.find()) { + currentHpoTermId = TermId.of(matcher.group(1)); + inHpTerm = true; + System.out.println(currentHpoTermId.getValue()); + } else if (inHpTerm) { + if (line.isEmpty()) { + inHpTerm = false; + } else if (line.startsWith("name:")) { + line = line.substring(5); + String [] fields = line.split("\\{"); + if (fields.length == 1) { + System.out.println("English! " + fields[0]); + String hpoLabel = fields[0].trim(); + languageToInternationalMap.get(ENGLISH).addTerm(currentHpoTermId, hpoLabel); + } else if (fields.length == 2) { + System.out.println("OTHER! " + fields[0] + "--" + fields[1]); + String hpoLabel = fields[0].trim(); + String annots = fields[1]; + Optional opt = getLanguage(annots); + if (opt.isPresent()) { + String language = opt.get(); + System.out.println(language); + languageToInternationalMap.get(language).addTerm(currentHpoTermId, hpoLabel); + } else { + System.err.printf("[ERROR] Could not extract language for %s.", line); + } + } + + } + + } + // System.out.println(line); + } + } catch (IOException e) { + e.printStackTrace(); + } + for (String language : languageToInternationalMap.keySet()) { + System.out.println(language); + HpInternational international = languageToInternationalMap.get(language); + for (var entry : international.getTermIdToLabelMap().entrySet()) { + System.out.printf("\t%s: %s\n", entry.getKey().getValue(), entry.getValue()); + } + } + } + + public Map getLanguageToInternationalMap() { + return languageToInternationalMap; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/PhenopacketFactoryIngestor.java similarity index 97% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/PhenopacketFactoryIngestor.java index 0d3cc8c..80fc6b2 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenopacketFactoryIngestor.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/PhenopacketFactoryIngestor.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenotypicFeatureFilter.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/PhenotypicFeatureFilter.java similarity index 98% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenotypicFeatureFilter.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/PhenotypicFeatureFilter.java index 71fe6ba..eb2fece 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/PhenotypicFeatureFilter.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/PhenotypicFeatureFilter.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; import org.monarchinitiative.phenol.ontology.data.Ontology; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryOutputGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryOutputGenerator.java similarity index 96% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryOutputGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryOutputGenerator.java index 0b17252..e975ce7 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryOutputGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryOutputGenerator.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; import org.monarchinitiative.phenol.base.PhenolRuntimeException; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryOutputType.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryOutputType.java similarity index 87% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryOutputType.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryOutputType.java index f1b280f..563dd64 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryOutputType.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryOutputType.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; public enum QueryOutputType { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryPromptFactory.java similarity index 86% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryPromptFactory.java index 6c90649..7d3dc75 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/QueryPromptFactory.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/QueryPromptFactory.java @@ -1,13 +1,13 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; -import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.QcQueryGenerator; -import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.TextWithManualAnnotsGenerator; -import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.TextWithoutDiscussionQuery; -import org.monarchinitiative.phenopacket2prompt.querygen.qfactory.PhenopacketOnlyQuery; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory.PhenopacketOnlyQuery; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory.QcQueryGenerator; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory.TextWithManualAnnotsGenerator; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory.TextWithoutDiscussionQuery; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePoint.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/TimePoint.java similarity index 82% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePoint.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/TimePoint.java index 9a4e72a..5436a18 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePoint.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/TimePoint.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; public record TimePoint(String point, int start, int end) implements Comparable{ diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/TimePointParser.java similarity index 98% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParser.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/TimePointParser.java index 6974bda..2d22d3b 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParser.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/TimePointParser.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen; import java.util.*; import java.util.regex.Matcher; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/AbstractQueryGenerator.java similarity index 98% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/AbstractQueryGenerator.java index ea3407a..990b9f9 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/AbstractQueryGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/AbstractQueryGenerator.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen.qfactory; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; @@ -10,9 +10,9 @@ import org.monarchinitiative.phenol.ontology.data.TermId; import org.monarchinitiative.phenopacket2prompt.legacy.TimeSegment; import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; -import org.monarchinitiative.phenopacket2prompt.querygen.PhenotypicFeatureFilter; -import org.monarchinitiative.phenopacket2prompt.querygen.TimePoint; -import org.monarchinitiative.phenopacket2prompt.querygen.TimePointParser; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.PhenotypicFeatureFilter; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.TimePoint; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.TimePointParser; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.IndividualBuilder; import org.phenopackets.phenopackettools.builder.builders.MetaDataBuilder; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/PhenopacketOnlyQuery.java similarity index 91% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/PhenopacketOnlyQuery.java index 7e24994..ce9cb49 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/PhenopacketOnlyQuery.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/PhenopacketOnlyQuery.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen.qfactory; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory; import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/QcQueryGenerator.java similarity index 92% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/QcQueryGenerator.java index c1591e0..a57e508 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/QcQueryGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/QcQueryGenerator.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen.qfactory; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory; import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/TextWithManualAnnotsGenerator.java similarity index 97% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/TextWithManualAnnotsGenerator.java index 8eea8cc..a6ab8e6 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithManualAnnotsGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/TextWithManualAnnotsGenerator.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen.qfactory; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory; import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.base.PhenolRuntimeException; @@ -6,8 +6,8 @@ import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalConceptI; import org.monarchinitiative.phenopacket2prompt.legacy.AdditionalConceptType; import org.monarchinitiative.phenopacket2prompt.legacy.nejm.NejmCaseReportFromPdfFilterer; -import org.monarchinitiative.phenopacket2prompt.querygen.TimePoint; -import org.monarchinitiative.phenopacket2prompt.querygen.TimePointParser; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.TimePoint; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.TimePointParser; import org.phenopackets.schema.v2.core.OntologyClass; import org.phenopackets.schema.v2.core.PhenotypicFeature; import org.slf4j.Logger; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/TextWithoutDiscussionQuery.java similarity index 90% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/TextWithoutDiscussionQuery.java index 2eb967b..f80654e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/querygen/qfactory/TextWithoutDiscussionQuery.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/querygen/qfactory/TextWithoutDiscussionQuery.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.phenopacket2prompt.querygen.qfactory; +package org.monarchinitiative.phenopacket2prompt.legacy.querygen.qfactory; import org.monarchinitiative.fenominal.core.TermMiner; import org.monarchinitiative.phenol.ontology.data.Ontology; diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParserTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParserTest.java new file mode 100644 index 0000000..40e72bf --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParserTest.java @@ -0,0 +1,32 @@ +package org.monarchinitiative.phenopacket2prompt.international; + +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.net.URL; +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; + +public class HpInternationalOboParserTest { + private static final String pbpoPath = "data/small-hp-international.obo"; + private static final ClassLoader classLoader = HpInternationalOboParserTest.class.getClassLoader(); + private static final URL resource = (classLoader.getResource(pbpoPath)); + private static final File file = new File(resource.getFile()); + private static final HpInternationalOboParser parser = new HpInternationalOboParser(file); + + @Test + public void testParser() { + assertNotNull(parser); + } + + @Test + public void testLanguage() { + String line = "name: 手劈裂 {source:value=\"Split hand\", translation:status=\"OFFICIAL\", source:language=\"en\", translation:language=\"zh\"}"; + Optional opt = parser.getLanguage(line); + assertTrue(opt.isPresent()); + String lang = opt.get(); + assertEquals("zh", lang); + } + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java index 0479f1f..fe562d9 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java @@ -3,11 +3,12 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; import org.monarchinitiative.phenol.ontology.data.TermId; -import org.monarchinitiative.phenopacket2prompt.model.ppkt.*; import java.io.File; import java.net.URL; +import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.function.Predicate; @@ -50,7 +51,7 @@ public void testPhenopacketAgeLastEncounter() { PhenopacketAge ppktAge = opt.get(); assertEquals(PhenopacketAgeType.ISO8601_AGE_TYPE, ppktAge.ageType()); String iso = ppktAge.age(); - assertEquals("P20Y", iso); + assertEquals("20 year-old", iso); } @Test @@ -60,7 +61,7 @@ public void testPhenopacketOnset() { PhenopacketAge onsetAge = opt.get(); assertEquals(PhenopacketAgeType.ISO8601_AGE_TYPE, onsetAge.ageType()); String iso = onsetAge.age(); - assertEquals("P5M", iso); + assertEquals("5 month-old", iso); } @Test @@ -76,10 +77,14 @@ public void testPhenopacketDisease() { @Test public void testPhenotypicFeatures() { - List ppktFeatures = ppktIndividual.getPhenotypicFeatures(); - assertFalse(ppktFeatures.isEmpty()); + Map> ppktFeatureMap = ppktIndividual.getPhenotypicFeatures(); + assertFalse(ppktFeatureMap.isEmpty()); Predicate termPredicate = term -> term.getLabel().equals("Cerebral atrophy"); - Optional opt = ppktFeatures.stream().filter(termPredicate).findAny(); + List otlist = new ArrayList<>(); + for (var list: ppktFeatureMap.values()) { + otlist.addAll(list); + } + Optional opt = otlist.stream().filter(termPredicate).findAny(); assertTrue(opt.isPresent()); OntologyTerm term = opt.get(); assertEquals("Cerebral atrophy", term.getLabel()); diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParserTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParserTest.java index 9aa3606..7007bd9 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParserTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/querygen/TimePointParserTest.java @@ -1,6 +1,8 @@ package org.monarchinitiative.phenopacket2prompt.querygen; import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.TimePoint; +import org.monarchinitiative.phenopacket2prompt.legacy.querygen.TimePointParser; import java.util.List; diff --git a/src/test/resources/data/small-hp-international.obo b/src/test/resources/data/small-hp-international.obo new file mode 100644 index 0000000..26ad5b6 --- /dev/null +++ b/src/test/resources/data/small-hp-international.obo @@ -0,0 +1,224 @@ +format-version: 1.2 +data-version: hp/releases/2024-04-19/hp-international.owl +subsetdef: 1_STAR "" +subsetdef: 2_STAR "" +subsetdef: 3_STAR "" +subsetdef: abnormal_slim "" +subsetdef: absent_slim "" +subsetdef: attribute_slim "" +subsetdef: BDS_subset "" +subsetdef: blood_and_immune_upper_slim "" +subsetdef: cell_quality "" +subsetdef: cellxgene_subset "" +subsetdef: common_anatomy "" +subsetdef: cumbo "" +subsetdef: cyclostome_subset "" +subsetdef: defined_by_ordinal_series "" +subsetdef: developmental_classification "" +subsetdef: disposition_slim "" +subsetdef: early_development "" +subsetdef: efo_slim "" +subsetdef: emapa_ehdaa2 "" +subsetdef: eye_upper_slim "" +subsetdef: feed_aligned "" +subsetdef: functional_classification "" +subsetdef: general_cell_types_upper_slim "" +subsetdef: gocheck_do_not_annotate "" +subsetdef: gocheck_do_not_manually_annotate "" +subsetdef: goslim_agr "" +subsetdef: goslim_candida "" +subsetdef: goslim_chembl "" +subsetdef: goslim_drosophila "" +subsetdef: goslim_flybase_ribbon "" +subsetdef: goslim_generic "" +subsetdef: goslim_metagenomics "" +subsetdef: goslim_mouse "" +subsetdef: goslim_pir "" +subsetdef: goslim_plant "" +subsetdef: goslim_pombe "" +subsetdef: goslim_prokaryote "" +subsetdef: goslim_synapse "" +subsetdef: goslim_yeast "" +subsetdef: grouping_class "" +subsetdef: homology_grouping "" +subsetdef: hpo_slim "" +subsetdef: hposlim_core "Core clinical terminology" +subsetdef: http://purl.obolibrary.org/obo/valid_for_go_annotation_extension "" +subsetdef: http://purl.obolibrary.org/obo/valid_for_go_gp2term "" +subsetdef: http://purl.obolibrary.org/obo/valid_for_go_ontology "" +subsetdef: http://purl.obolibrary.org/obo/valid_for_gocam "" +subsetdef: human_reference_atlas "" +subsetdef: inconsistent_with_fma "" +subsetdef: kidney_upper_slim "" +subsetdef: location_grouping "" +subsetdef: major_organ "" +subsetdef: mpath_slim "" +subsetdef: non_informative "" +subsetdef: organ_slim "" +subsetdef: pheno_slim "" +subsetdef: phenotype_rcn "" +subsetdef: relational_slim "" +subsetdef: ro-eco "" +subsetdef: RO:0002259 "" +subsetdef: scalar_slim "" +subsetdef: secondary_consequence "Consequence of a disorder in another organ system." +subsetdef: uberon_slim "" +subsetdef: ubprop:upper_level "" +subsetdef: unverified_taxonomic_grouping "" +subsetdef: upper_level "" +subsetdef: value_slim "" +subsetdef: vertebrate_core "" +synonymtypedef: abbreviation "abbreviation" +synonymtypedef: BRAND_NAME "" +synonymtypedef: Gene-based "" +synonymtypedef: HP:0034334 "対立遺伝子:要求性" +synonymtypedef: INN "" +synonymtypedef: IUPAC_NAME "" +synonymtypedef: layperson "layperson term" +synonymtypedef: obsolete_synonym "discarded/obsoleted synonym" +synonymtypedef: OMO:0003000 "" +synonymtypedef: OMO:0003002 "" +synonymtypedef: OMO:0003004 "" +synonymtypedef: OMO:0003011 "" +synonymtypedef: plural_form "plural form" +synonymtypedef: PRO-short-label "" +synonymtypedef: syngo_official_label "" +synonymtypedef: systematic_synonym "" +synonymtypedef: uk_spelling "UK spelling" +default-namespace: human_phenotype +remark: Please see license of HPO at http://www.human-phenotype-ontology.org +ontology: hp/hp-international +property_value: http://purl.org/dc/elements/1.1/creator "Human Phenotype Ontology Consortium" xsd:string +property_value: http://purl.org/dc/elements/1.1/creator "Monarch Initiative" xsd:string +property_value: http://purl.org/dc/elements/1.1/creator "Peter Robinson" xsd:string +property_value: http://purl.org/dc/elements/1.1/creator "Sebastian Köhler" xsd:string +property_value: http://purl.org/dc/elements/1.1/description "The Human Phenotype Ontology (HPO) provides a standardized vocabulary of phenotypic abnormalities and clinical features encountered in human disease." xsd:string +property_value: http://purl.org/dc/elements/1.1/rights "Peter Robinson, Sebastian Koehler, The Human Phenotype Ontology Consortium, and The Monarch Initiative" xsd:string +property_value: http://purl.org/dc/elements/1.1/subject "Phenotypic abnormalities encountered in human disease" xsd:string +property_value: http://purl.org/dc/elements/1.1/title "Human Phenotype Ontology" xsd:string +property_value: http://purl.org/dc/terms/license https://hpo.jax.org/app/license +property_value: IAO:0000700 HP:0000001 +property_value: owl:versionInfo "2024-04-19" xsd:string +logical-definition-view-relation: has_part + +[Term] +id: BFO:0000002 +name: continuant +def: "An entity that exists in full at any time in which it exists at all, persists through time while maintaining its identity and has no temporal parts." [] +disjoint_from: BFO:0000003 ! occurrent +relationship: part_of BFO:0000002 {all_only="true"} ! continuant + + +[Term] +id: HP:0001166 +name: Arachnodactylie {translation:language="fr", source:value="Arachnodactyly", translation:status="OFFICIAL", https://w3id.org/babelon/comment="label of HP:0001166 (Arachnodactyly) ; abnormality of limbs ; abnormality of the skeletal system", source:language="en"} +name: Arachnodactylie {translation:status="CANDIDATE", source:value="Arachnodactyly", source:language="en", translation:language="nl"} +name: Arachnodactyly +name: Arachnodaktylie {source:value="Arachnodactyly", translation:status="OFFICIAL", translation:language="cs", source:language="en"} +name: Aracnodactilia {source:value="Arachnodactyly", translation:status="OFFICIAL", source:language="en", translation:language="es"} +name: Araknodaktili {source:value="Arachnodactyly", translation:status="OFFICIAL", source:language="en", translation:language="tr"} +name: くも指 {https://w3id.org/babelon/source="http://purl.obolibrary.org/obo/hp.owl", translation:date="2023-07-31", translation:precision="EXACT", source:value="Arachnodactyly", translation:confidence="1.0", translation:language="ja", translation:status="OFFICIAL", source:language="en", source:version="http://purl.obolibrary.org/obo/hp/releases/2023-07-21/hp.owl"} +name: 细长指(趾) {source:value="Arachnodactyly", translation:status="OFFICIAL", source:language="en", translation:language="zh"} +alt_id: HP:0001505 +def: "\"Abnormálně dlouhé a štíhlé prsty (\"\"pavoučí prsty\"\")\"" [] {source:value="\"Abnormally long and slender fingers (\"\"spider fingers\"\").\"", translation:status="OFFICIAL", translation:language="cs", source:language="en"} +def: "\"Anormal uzun ve ince parmaklar (\"\"örümcek parmaklar\"\")\"" [] {source:value="\"Abnormally long and slender fingers (\"\"spider fingers\"\").\"", translation:status="OFFICIAL", source:language="en", translation:language="tr"} +def: "Abnormally long and slender fingers (\"spider fingers\")." [https://orcid.org/0000-0002-0736-9199] +synonym: "Dlouhé prsty" EXACT [] +synonym: "Long slender fingers" EXACT layperson [] +synonym: "Long slender fingers" EXACT [] +synonym: "Long slender fingers|Long, slender fingers|Spider fingers" EXACT [] +synonym: "Long, slender fingers" EXACT [] +synonym: "Pavoučí prsty" EXACT [] +synonym: "Spider fingers" EXACT [] +synonym: "Spider fingers" EXACT layperson [] +synonym: "Štíhlé prsty" EXACT [] +xref: MSH:D054119 +xref: SNOMEDCT_US:62250003 +xref: UMLS:C0003706 +is_a: HP:0001238 ! Nsatea εyε teatea/tratra sen nipadua no size +is_a: HP:0100807 ! 長い指 + +[Term] +id: HP:0001167 +name: Abnormal finger morphology +name: Abnormalita prstu {translation:status="CANDIDATE", translation:language="cs", source:language="en", source:value="Abnormal finger morphology"} +name: Afwijking van vinger {translation:status="CANDIDATE", source:language="en", source:value="Abnormal finger morphology", translation:language="nl"} +name: Anomalie des doigts {translation:language="fr", https://w3id.org/babelon/comment="label of HP:0001167 (Abnormality of finger) ; abnormality of limbs ; abnormality of the skeletal system", translation:status="CANDIDATE", source:language="en", source:value="Abnormal finger morphology"} +name: Anomalía del dedo de la mano {translation:status="CANDIDATE", source:language="en", source:value="Abnormal finger morphology", translation:language="es"} +name: Parmak anormalliği {translation:status="CANDIDATE", source:language="en", source:value="Abnormal finger morphology", translation:language="tr"} +name: 手指异常 {translation:status="CANDIDATE", source:language="en", source:value="Abnormal finger morphology", translation:language="zh"} +name: 指の異常 {https://w3id.org/babelon/source="http://purl.obolibrary.org/obo/hp.owl", translation:date="2023-07-31", translation:precision="EXACT", translation:confidence="1.0", translation:language="ja", translation:status="OFFICIAL", source:language="en", source:value="Abnormal finger morphology", source:version="http://purl.obolibrary.org/obo/hp/releases/2023-07-21/hp.owl"} +alt_id: HP:0003035 +def: "An anomaly of a finger." [https://orcid.org/0000-0002-0736-9199] +def: "Anomálie prstu" [] {translation:status="OFFICIAL", source:value="An anomaly of a finger.", translation:language="cs", source:language="en"} +def: "Parmağın bir anomalisi" [] {translation:status="OFFICIAL", source:value="An anomaly of a finger.", source:language="en", translation:language="tr"} +synonym: "Abnormalities of the fingers" EXACT [] +synonym: "Abnormalities of the fingers" EXACT layperson [] +synonym: "Abnormality of finger" EXACT layperson [] +synonym: "Abnormality prstů" EXACT [] +xref: UMLS:C2674737 +is_a: HP:0001155 ! Afwijking van de hand +is_a: HP:0011297 ! Afwijking van vinger/teen + +[Term] +id: HP:0001169 +name: Brede palm {translation:status="CANDIDATE", source:value="Broad palm", source:language="en", translation:language="nl"} +name: Broad palm +name: Geniş avuç {source:value="Broad palm", translation:status="OFFICIAL", source:language="en", translation:language="tr"} +name: Mains larges {translation:language="fr", https://w3id.org/babelon/comment="label of HP:0001169 (Broad palm) ; abnormality of limbs", source:value="Broad palm", translation:status="OFFICIAL", source:language="en"} +name: Palma de la mano ancha {source:value="Broad palm", translation:status="OFFICIAL", source:language="en", translation:language="es"} +name: Široká dlaň {source:value="Broad palm", translation:status="OFFICIAL", translation:language="cs", source:language="en"} +name: 宽掌 {source:value="Broad palm", translation:status="OFFICIAL", source:language="en", translation:language="zh"} +name: 幅広い手掌 {https://w3id.org/babelon/source="http://purl.obolibrary.org/obo/hp.owl", translation:date="2023-07-31", translation:precision="EXACT", translation:confidence="1.0", translation:language="ja", source:value="Broad palm", translation:status="OFFICIAL", source:language="en", source:version="http://purl.obolibrary.org/obo/hp/releases/2023-07-21/hp.owl"} +alt_id: HP:0001174 +def: "For children from birth to 4 years of age the palm width is more than 2 SD above the mean; for children from 4 to 16 years of age the palm width is above the 95th centile; or, the width of the palm appears disproportionately wide for the length." [https://orcid.org/0000-0002-0736-9199, PMID:19125433] +def: "Pro děti od narození do 4 let věku šířka dlaně větší než 2 SD; pro děti od 4 do 16 let šířka dlaně nad 95. percentilem; nebo očividně disproporční šířka dlaně k délku" [] {source:value="For children from birth to 4 years of age the palm width is more than 2 SD above the mean; for children from 4 to 16 years of age the palm width is above the 95th centile; or, the width of the palm appears disproportionately wide for the length.", translation:status="OFFICIAL", translation:language="cs", source:language="en"} +def: "Çocuklarda doğumdan 4 yaşına kadar avuç genişliğinin ortalamanın 2 SD üzerinde olmasıdır; 4 yaşından 16 yaşında kadar avuç genişliğinin 95. yüzdelik dilimin üzerinde olmasıdır; veya avuç genişliğinin uzunluğuna göre orantısız olarak görünmesi" [] {source:value="For children from birth to 4 years of age the palm width is more than 2 SD above the mean; for children from 4 to 16 years of age the palm width is above the 95th centile; or, the width of the palm appears disproportionately wide for the length.", translation:status="OFFICIAL", source:language="en", translation:language="tr"} +comment: Hand width is measured across the palm at the level of the MCPJ (radial aspect of the second MCPJ to the ulnar aspect of the fifth MCPJ) [Hall et al., 2007]. Caution is advised with the subjective assessment as short metacarpals can mimic a broad palm. In persons with polydactyly that includes a supernumerary metacarpal, that should be separately coded and the measurement technique from Hall et al, [2007] would need to be modified to account for the supernumerary digit (i.e., with postaxial polydactyly, measure to the sixth MCPJ). +synonym: "Broad hand" EXACT layperson [] +synonym: "Broad hand" EXACT [] +synonym: "Broad hands" EXACT [] +synonym: "Broad hands" EXACT layperson [https://orcid.org/0000-0002-5316-1399] +synonym: "Broad palm" EXACT layperson [] +synonym: "Mains larges" EXACT [] +synonym: "mains larges et courtes" EXACT [] +synonym: "Wide palm" EXACT [] +synonym: "Wide palm" EXACT layperson [] +synonym: "Široké dlaně" EXACT [] +synonym: "Široké ruce" EXACT [] +xref: SNOMEDCT_US:48251003 +xref: UMLS:C0264142 +is_a: HP:0100871 ! Afwijking van de palm +intersection_of: PATO:0000600 ! increased width +intersection_of: inheres_in UBERON:0008878 ! palmar part of manus +intersection_of: qualifier PATO:0000460 ! abnormal + +[Term] +id: HP:0001171 +name: Ectrodactylie {translation:status="CANDIDATE", source:value="Split hand", source:language="en", translation:language="nl"} +name: Ectrodactylie {https://w3id.org/babelon/comment="label of HP:0001171 (Split hand) ; abnormality of limbs ; abnormality of the skeletal system", translation:language="fr", source:value="Split hand", translation:status="OFFICIAL", source:language="en"} +name: Laar Parirr {source:value="Split hand", translation:status="OFFICIAL", source:language="en", translation:language="nna"} +name: Mano hendida {source:value="Split hand", translation:status="OFFICIAL", source:language="en", translation:language="es"} +name: Nsatea baako,mmienu anaa deε boro saa akabom baako na agya kokurobeti no koraa {source:value="Split hand", translation:status="OFFICIAL", translation:language="tw", source:language="en"} +name: Rozštěp ruky {source:value="Split hand", translation:status="OFFICIAL", translation:language="cs", source:language="en"} +name: Split hand +name: Yarık el {source:value="Split hand", translation:status="OFFICIAL", source:language="en", translation:language="tr"} +name: 手劈裂 {source:value="Split hand", translation:status="OFFICIAL", source:language="en", translation:language="zh"} +name: 裂手 {https://w3id.org/babelon/source="http://purl.obolibrary.org/obo/hp.owl", translation:date="2023-07-31", translation:precision="EXACT", source:value="Split hand", translation:confidence="1.0", translation:language="ja", translation:status="OFFICIAL", source:language="en", source:version="http://purl.obolibrary.org/obo/hp/releases/2023-07-21/hp.owl"} +alt_id: HP:0001247 +alt_id: HP:0003050 +def: "A condition in which middle parts of the hand (fingers and metacarpals) are missing giving a cleft appearance. The severity is very variable ranging from slightly hypoplastic middle fingers over absent middel fingers as far as oligo- or monodactyl hands." [https://orcid.org/0009-0006-4530-3154] +def: "Stav, kdy chybí střední části ruky (prsty na ruce a metakarlální kosti), což vypadá jako rozštěp. Závažnost je velmi variabilní od mírně hypoplastických středních prstů přes chybění středních prstů nebo až oligo- nebo monodaktylické ruce." [] {source:value="A condition in which middle parts of the hand (fingers and metacarpals) are missing giving a cleft appearance. The severity is very variable ranging from slightly hypoplastic middle fingers over absent middel fingers as far as oligo- or monodactyl hands.", translation:status="OFFICIAL", translation:language="cs", source:language="en"} +def: "Yarık görünümü veren elin orta kısımlarının (parmaklar ve metakarpaller) eksik olması durumudur. Hafif hipoplastik orta parmaktan, orta parmak yokluğuna hatta oligo-monodaktiliye kadar ciddiyeti oldukça değişkendir" [] {source:value="A condition in which middle parts of the hand (fingers and metacarpals) are missing giving a cleft appearance. The severity is very variable ranging from slightly hypoplastic middle fingers over absent middel fingers as far as oligo- or monodactyl hands.", translation:status="OFFICIAL", source:language="en", translation:language="tr"} +synonym: "Ectrodactyly of the hand" EXACT [] +synonym: "Ektrodaktylie ruky" EXACT [] +synonym: "Hand ectrodactyly" EXACT [] +synonym: "Main fendue" EXACT [] +synonym: "Rozštěpená ruka" EXACT [] +synonym: "Split hand" EXACT layperson [] +synonym: "Split-hand" EXACT layperson [] +synonym: "Split-hand" EXACT [] +xref: SNOMEDCT_US:299034005 +xref: UMLS:C0221373 +is_a: HP:0005922 ! Anomalie de la morphologie de la main +is_a: HP:0100257 ! Ektrodaktylie \ No newline at end of file From 308eca476f0a17df236861b01b52784399f3bca2 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 24 Apr 2024 12:38:46 +0200 Subject: [PATCH 5/6] spanish --- pom.xml | 2 +- .../cmd/GptTranslateCommand.java | 21 ++++ .../cmd/OntoGptCommand.java | 1 - .../international/HpInternational.java | 10 ++ .../HpInternationalOboParser.java | 17 ++- .../phenopacket2prompt/model/Iso8601Age.java | 1 - .../model/PpktIndividual.java | 2 +- .../PpktPhenotypicFeatureGenerator.java | 1 - .../output/PromptGenerator.java | 28 +++-- .../{ => english}/EnglishPromptGenerator.java | 4 +- .../english/PpktPhenotypicfeatureEnglish.java | 1 - .../impl/spanish/PhenopacketSexSpanish.java | 58 ++++++++++ .../output/impl/spanish/PpktAgeSpanish.java | 45 ++++++++ .../spanish/PpktPhenotypicfeatureSpanish.java | 52 +++++++++ .../output/impl/spanish/PpktTextSpanish.java | 38 +++++++ .../impl/spanish/SpanishPromptGenerator.java | 100 ++++++++++++++++++ 16 files changed, 360 insertions(+), 21 deletions(-) rename src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/{ => english}/EnglishPromptGenerator.java (96%) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java diff --git a/pom.xml b/pom.xml index 01800b4..4b871bf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.10 + 0.3.11 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index ccd4f7d..68cd748 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -3,6 +3,8 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import org.slf4j.Logger; @@ -10,6 +12,7 @@ import picocli.CommandLine; import java.io.File; +import java.util.Map; import java.util.concurrent.Callable; @@ -24,6 +27,10 @@ public class GptTranslateCommand implements Callable { description = "path to HP json file") private String hpoJsonPath = "data/hp.json"; + @CommandLine.Option(names = {"--translations"}, + description = "path to translations file") + private String translationsPath = "data/hp-international.obo"; + @CommandLine.Option(names = {"-p", "--ppkt"}, description = "Path to JSON phenopacket file", required = true) private String ppkt; @@ -36,11 +43,25 @@ public Integer call() throws Exception { } Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); LOGGER.info("HPO version {}", hpo.version().orElse("n/a")); + File translationsFile = new File(translationsPath); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsPath); + return 1; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + LOGGER.info("Got {} translations", internationalMap.size()); + + System.out.println(hpo.version().orElse("n/a")); PromptGenerator generator = PromptGenerator.english(hpo); PpktIndividual individual = new PpktIndividual(new File(ppkt)); String prompt = generator.createPrompt(individual); System.out.println(prompt); + System.out.println("SPANISH"); + PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + prompt = spanish.createPrompt(individual); + System.out.println(prompt); return 0; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java index 8e6c075..2cd9ff0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java @@ -8,7 +8,6 @@ import org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryOutputGenerator; import org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryOutputType; import org.monarchinitiative.phenopacket2prompt.legacy.querygen.QueryPromptFactory; -import org.monarchinitiative.phenopacket2prompt.querygen.*; import picocli.CommandLine; import java.io.*; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java index 2823dc7..bcff3b3 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternational.java @@ -3,6 +3,7 @@ import org.monarchinitiative.phenol.ontology.data.TermId; import java.util.Map; import java.util.HashMap; +import java.util.Optional; public class HpInternational { @@ -26,4 +27,13 @@ public String getLanguageAcronym() { public Map getTermIdToLabelMap() { return termIdToLabelMap; } + + public Optional getLabel(TermId hpoId) { + if (termIdToLabelMap.containsKey(hpoId)) { + return Optional.of(termIdToLabelMap.get(hpoId)); + } else { + return Optional.empty(); + } + } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java index 815b85e..52a4824 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java @@ -38,6 +38,18 @@ public Optional getLanguage(String annots) { } } + public Optional getTranslation(String annots) { + final String translation = "translation:language=\"(\\w{2,2})\""; + final Pattern pattern = Pattern.compile(translation); + Matcher matcher = pattern.matcher(annots); + if (matcher.find()) { + String language = matcher.group(1); + return Optional.of(language); + } else { + return Optional.empty(); + } + } + public HpInternationalOboParser(File file) { languageToInternationalMap = new HashMap<>(); String pattern = "id: (HP:\\d{7,7})"; @@ -56,7 +68,7 @@ public HpInternationalOboParser(File file) { if (matcher.find()) { currentHpoTermId = TermId.of(matcher.group(1)); inHpTerm = true; - System.out.println(currentHpoTermId.getValue()); + //System.out.println(currentHpoTermId.getValue()); } else if (inHpTerm) { if (line.isEmpty()) { inHpTerm = false; @@ -64,17 +76,14 @@ public HpInternationalOboParser(File file) { line = line.substring(5); String [] fields = line.split("\\{"); if (fields.length == 1) { - System.out.println("English! " + fields[0]); String hpoLabel = fields[0].trim(); languageToInternationalMap.get(ENGLISH).addTerm(currentHpoTermId, hpoLabel); } else if (fields.length == 2) { - System.out.println("OTHER! " + fields[0] + "--" + fields[1]); String hpoLabel = fields[0].trim(); String annots = fields[1]; Optional opt = getLanguage(annots); if (opt.isPresent()) { String language = opt.get(); - System.out.println(language); languageToInternationalMap.get(language).addTerm(currentHpoTermId, hpoLabel); } else { System.err.printf("[ERROR] Could not extract language for %s.", line); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java index 0c51307..1692624 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java @@ -36,7 +36,6 @@ public Iso8601Age(String iso) { i = ageString.indexOf("D"); if (i>0) { days = Integer.parseInt(ageString.substring(0,i)); - ageString = ageString.substring(i+1); } else { days = 0; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java index 6857244..24ffece 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java @@ -17,7 +17,7 @@ import java.util.*; public class PpktIndividual { - Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); + final Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); private final Phenopacket ppkt; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java index dea0895..f7a65b5 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -1,7 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.output; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import java.util.List; import java.util.function.Predicate; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index 4fb5cc7..fedea76 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -1,16 +1,20 @@ package org.monarchinitiative.phenopacket2prompt.output; import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.impl.EnglishPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; import org.monarchinitiative.phenopacket2prompt.output.impl.english.PhenopacketSexEnglish; import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktAgeEnglish; import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktPhenotypicfeatureEnglish; import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktTextEnglish; +import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*; public interface PromptGenerator { + + String queryHeader(); String getIndividualInformation(PpktIndividual ppktIndividual); String getPhenotypicFeatures(PpktIndividual ppktIndividual); @@ -25,16 +29,24 @@ public static PromptGenerator english(Ontology ontology){ return new EnglishPromptGenerator(ontology, sgen, page, ptext, pfgen); } - + static PromptGenerator spanish(Ontology hpo, HpInternational international) { + PhenopacketSexGenerator sgen = new PhenopacketSexSpanish(); + PhenopacketAgeGenerator page = new PpktAgeSpanish(); + PhenopacketTextGenerator ptext = new PpktTextSpanish(); + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureSpanish(international); + return new SpanishPromptGenerator(hpo, sgen, page, ptext, pfgen); + } default String createPrompt(PpktIndividual individual) { - StringBuilder sb = new StringBuilder(); - sb.append(queryHeader()); - sb.append(getIndividualInformation(individual)); - sb.append(getPhenotypicFeatures(individual)); + String sb = queryHeader() + + getIndividualInformation(individual) + + getPhenotypicFeatures(individual); + return sb; + } + + + - return sb.toString(); - } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java similarity index 96% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java index 9df0cba..e8c1cf6 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/EnglishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java @@ -1,14 +1,12 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl; +package org.monarchinitiative.phenopacket2prompt.output.impl.english; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.*; import java.util.*; -import java.util.stream.Collectors; public class EnglishPromptGenerator implements PromptGenerator { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java index b2ef31a..2add8ed 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java @@ -1,7 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.english; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; import java.util.List; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java new file mode 100644 index 0000000..6a3298e --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java @@ -0,0 +1,58 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketSexGenerator; + +import java.util.Optional; + +public class PhenopacketSexSpanish implements PhenopacketSexGenerator { + @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java new file mode 100644 index 0000000..9e64106 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java @@ -0,0 +1,45 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAgeType; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketAgeGenerator; + +public class PpktAgeSpanish implements PhenopacketAgeGenerator { + @Override + public String age(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return ppktAge.age() + " old"; + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "bebé"; + case "Childhood onset" -> "niño"; + case "Neonatal onset" -> "neonate"; + case "Congenital onset" -> "recién nacido"; + case "Adult onset" -> "adulto"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "A la edad de " + ppktAge.age(); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Durante el periodo infantil"; + case "Childhood onset" -> "Durante la infancia"; + case "Neonatal onset" -> "Durante el periodo neonatal"; + case "Congenital onset" -> "Al nacer"; + case "Adult onset" -> "Como adulto"; + default-> String.format("Durante el %s periodo", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java new file mode 100644 index 0000000..43e6ce5 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -0,0 +1,52 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureSpanish implements PpktPhenotypicFeatureGenerator { + + private final HpInternational spanish; + + public PpktPhenotypicfeatureSpanish(HpInternational international) { + spanish = international; + } + + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = spanish.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", spanish.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + } + } + return labels; + } + + + + @Override + public String featureList(List ontologyTerms) { + List terms = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)).toList(); + List labels = getTranslations(terms); + return getOxfordCommaList(labels, "y"); + } + + @Override + public String excludedFeatureList(List ontologyTerms) { + List terms = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded).toList(); + List labels = getTranslations(terms); + return getOxfordCommaList(labels, "y"); + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java new file mode 100644 index 0000000..3ec19bf --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java @@ -0,0 +1,38 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextSpanish implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Estoy realizando un experimento en una conferencia de casos clinicopatológicos para ver cómo sus diagnósticos\s +se comparan con los de los expertos humanos. Les voy a dar parte de un caso médico. Estos han sido\s +todos han sido publicados en el New England Journal of Medicine. Usted no está tratando a ningún paciente. +Cuando lea el caso, observará que hay expertos que exponen sus opiniones.\s +En este caso, usted es el "Dr. GPT-4", un modelo de lenguaje Al que está discutiendo el caso junto con expertos humanos.\s +expertos humanos. Una conferencia clinicopatológica tiene varias reglas tácitas. La primera es\s +que la mayoría de las veces hay un único diagnóstico definitivo (aunque rara vez puede haber más de uno), +y se trata de un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se\s +confirmado mediante algún tipo de prueba de patología clínica o anatomopatológica, aunque en\s +casos raros en los que no existe una prueba de este tipo para un diagnóstico, éste puede\s +diagnóstico puede realizarse mediante criterios clínicos validados o, en muy raras ocasiones, simplemente confirmarse mediante la opinión de un experto.\s +Al final de la descripción del caso se le indicará si se solicita alguna prueba o pruebas diagnósticas.\s +diagnósticas, que puede suponer que harán el diagnóstico o diagnósticos. Después de leer el caso\s +quiero que des dos datos. El primer dato es su diagnóstico o diagnósticos más probables.\s +diagnóstico/diagnósticos. El objetivo es obtener la respuesta correcta, no una amplia categoría de respuestas.\s +correcta, no una amplia categoría de respuestas. No es necesario que explique su razonamiento.\s +el/los diagnóstico/s. El segundo dato es dar un diagnóstico diferencial sólido,\s +ordenados por su probabilidad, de modo que el diagnóstico más probable esté arriba y el menos probable, abajo.\s +esté en la parte inferior. El número de diagnósticos diferenciales es ilimitado. Puede dar\s +Puede dar tantos diagnósticos como considere razonables. No es necesario que explique su razonamiento,\s +sólo enumere los diagnósticos. De nuevo, el objetivo es ser lo más específico posible con cada uno de los\s +diagnósticos.\s +¿Tiene alguna pregunta, Dr. GPT-4? + +Este es el caso: +"""; + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java new file mode 100644 index 0000000..0c99471 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -0,0 +1,100 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.*; + +import java.util.*; + +public class SpanishPromptGenerator implements PromptGenerator { + + private final Ontology hpo; + + private final PhenopacketSexGenerator sexGenerator; + + private final PhenopacketAgeGenerator ppktAgeGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public SpanishPromptGenerator(Ontology hpo, PhenopacketSexGenerator sgen, PhenopacketAgeGenerator page, PhenopacketTextGenerator ptext, PpktPhenotypicFeatureGenerator pfgen) { + this.hpo = hpo; + sexGenerator = sgen; + ppktAgeGenerator = page; + ppktTextGenerator = ptext; + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + StringBuilder sb = new StringBuilder(); + String sex = sexGenerator.ppktSex(ppktIndividual); + Optional lastAgeOpt = ppktIndividual.getAgeAtLastExamination(); + Optional onsetOpt = ppktIndividual.getAgeAtOnset(); + if (lastAgeOpt.isPresent()) { + PhenopacketAge lastExamAge = lastAgeOpt.get(); + String examAge = ppktAgeGenerator.age(lastExamAge); + sb.append("El probando era un ").append(examAge).append( " ").append(sex).append(". "); + } else { + sb.append("El probando era un ").append(sex).append(". "); + } + if (onsetOpt.isPresent()) { + PhenopacketAge onsetAge = onsetOpt.get(); + String onset = ppktAgeGenerator.age(onsetAge); + sb.append("Las manifestaciones iniciales de la enfermedad aparecieron cuando el probando era ").append(onset).append(". "); + } + return sb.toString(); + } + + @Override + public String getPhenotypicFeatures(PpktIndividual ppktIndividual) { + StringBuilder sb = new StringBuilder(); + Map> termMap = ppktIndividual.getPhenotypicFeatures(); + List ageList = new ArrayList<>(termMap.keySet()); + Collections.sort(ageList,(a, b) -> Integer.compare(a.totalDays(), b.totalDays())); + for (var age: ageList) { + List terms = termMap.get(age); + if (! age.specified()) { + if (termMap.size() > 1) { + // if size is greater than one, there was at least one specified time point + if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { + sb.append("Características adicionales comprendían").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); + } + if (ppktPhenotypicFeatureGenerator.hasExcludedFeatures(terms)) { + sb.append("Otras características excluidas fueron ").append(ppktPhenotypicFeatureGenerator.excludedFeatureList(terms)).append(". "); + } + } else { + if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { + sb.append("Se observaron las siguientes manifestaciones clínicas: ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); + } + if (ppktPhenotypicFeatureGenerator.hasExcludedFeatures(terms)) { + sb.append("Se excluyeron las siguientes manifestaciones clínicas: ").append(ppktPhenotypicFeatureGenerator.excludedFeatureList(terms)).append(". "); + } + } + } else { + String ageString = ppktAgeGenerator.age(age); + + if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { + sb.append(ageString).append(", se observaron las siguientes manifestaciones clínicas: ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); + } + if (ppktPhenotypicFeatureGenerator.hasExcludedFeatures(terms)) { + sb.append(ageString).append(", se excluyeron las siguientes manifestaciones clínicas: ").append(ppktPhenotypicFeatureGenerator.excludedFeatureList(terms)).append(". "); + } + } + } + + return sb.toString(); + } + + +} From a06801e4eba5c95e9c67b6740123c8f2ba47cf61 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 25 Apr 2024 14:51:51 +0200 Subject: [PATCH 6/6] documentation --- .github/workflows/documentation.yml | 40 +-- docs/Makefile | 20 -- docs/conf.py | 119 -------- docs/english.md | 0 docs/index.md | 72 +++++ docs/index.rst | 90 ------ docs/languages.md | 46 +++ mkdocs.yml | 65 ++++ .../output/IndividualInformation.java | 13 + .../output/PhenopacketAgeGenerator.java | 12 - .../output/PhenopacketAgeSexGenerator.java | 16 + .../output/PhenopacketSexGenerator.java | 11 - .../output/PromptGenerator.java | 16 +- .../impl/english/EnglishPromptGenerator.java | 18 +- .../impl/english/PhenopacketSexEnglish.java | 58 ---- .../output/impl/english/PpktAgeEnglish.java | 45 --- .../impl/english/PpktAgeSexEnglish.java | 110 +++++++ .../impl/spanish/PhenopacketSexSpanish.java | 58 ---- .../impl/spanish/PpktAgeSexSpanish.java | 277 ++++++++++++++++++ .../output/impl/spanish/PpktAgeSpanish.java | 45 --- .../impl/spanish/SpanishPromptGenerator.java | 20 +- 21 files changed, 634 insertions(+), 517 deletions(-) delete mode 100644 docs/Makefile delete mode 100644 docs/conf.py create mode 100644 docs/english.md create mode 100644 docs/index.md delete mode 100644 docs/index.rst create mode 100644 docs/languages.md create mode 100644 mkdocs.yml create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/IndividualInformation.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeSexGenerator.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeSexEnglish.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSexSpanish.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index f1a7f6b..48983c6 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -1,35 +1,25 @@ -name: Sphinx Documentation +name: mkdocs-generation on: push: - branches: [ main ] - + branches: + - main +permissions: + contents: write jobs: - build-docs: + deploy: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@main - with: - ref: ${{ github.ref }} + - uses: actions/checkout@v3 - - name: Set up Python 3. - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.x - - name: install - run: pip3 install sphinx sphinx-rtd-theme click ddt pandas setuptools + - uses: actions/cache@v2 + with: + key: ${{ github.ref }} + path: .cache - - name: Build documentation. - run: | - cd docs/ - sphinx-build -b html . _build - touch _build/.nojekyll + - run: python3 -m pip install .[docs] - - name: Deploy documentation. - if: ${{ github.event_name == 'push' }} - uses: JamesIves/github-pages-deploy-action@v4.3.0 - with: - branch: gh-pages - clean: true - folder: docs/_build + - run: mkdocs gh-deploy --force diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index ea6c3c2..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SPHINXPROJ = phenopacket2promot -SOURCEDIR = . -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index b9afeb1..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,119 +0,0 @@ -# -*- coding: utf-8 -*- -# -# VPV documentation build configuration file, created by -# sphinx-quickstart on Sun Sep 24 12:02:05 2017. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = ['sphinx.ext.mathjax'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'phenopacket2prompt' -copyright = u'2023' -author = u'Peter Robinson' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = u'0.2' -# The full version, including alpha/beta/rc tags. -release = u'0.2.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = 'en' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -# html_style = 'css/isopret.css' -html_css_files = ['style.css'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - '**': [ - 'about.html', - 'navigation.html', - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', - 'donate.html', - ] -} - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'phenopacket2prompt' - - diff --git a/docs/english.md b/docs/english.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..7fa954b --- /dev/null +++ b/docs/index.md @@ -0,0 +1,72 @@ +# Phenopacket2Prompt + + +phenopacket2promot is a Java 17 application that creates prompts intended for use with GPT starting from +GA4GH phenopackets. + + + + + +## Installation + + +Most users should download the prebuilt executable file from the +[Releases](https://github.com/monarch-initiative/phenopacket2prompt/releases) page of the GutHub repository. + +It is also possible to build the application from source using standard Maven and Java tools. + +```shell title="building the app" +git clone https://github.com/monarch-initiative/phenopacket2prompt.git +cd phenopacket2prompt +maven package +java -jar target/phenopacket2prompt.jar +``` + +## Setup + + +First download the latest copy of the [Human Phenotype Ontology](https://hpo.jax.org/app/) hp.json file. This file is +used for text mining of clinical signs and symptoms. For more information about the HPO, see +[Koehler et al. (2021)](https://pubmed.ncbi.nlm.nih.gov/33264411/). Adjust the path to the `phenopacket2prompt.jar` +file as necessary. + + + +```shell title="download" +java -jar phenopacket2prompt.jar download +``` + + + + +## Running phenopacket2prompt + + +Assuming the hp.json file has been downloaded as described above and all of the case report text files +are available in a directory at ``some/path/gptdocs``, run + + +```shell title="running the app" +java -jar phenopacket2prompt.jar gpt -g some/path/gptdocs +``` + + + +This command will create a new directory called ``gptOut`` (this can be adjusted using the -o option). +It will contain four subdirectories + +1. phenopackets. GA4GH phenopackets derived from each case report +2. phenopacket_based_queries. Feature-based query prompts for GPT-4 based on the information in the phenopackets +3. txt_without_discussion. Original query based on the original case report with text as presented by the first discussant up to but not including text contributed by the second discussant or any following text +4. txt_with_differential. Text that starts with the presentation by the first discussant up to and including the differential. This was used to check parsing but was not used in our analysis. + + + + + +### Feedback + + +The best place to leave feedback, ask questions, and report bugs is the [phenopacket2prompt Issue Tracker](https://github.com/monarch-initiative/phenopacket2prompt/issues). + diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 28ae757..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,90 +0,0 @@ -.. _home: - -================== -Phenopacket2Prompt -================== - -phenopacket2promot is a Java 17 application that performs text mining on files that have been extracted -from New England Journal of Medicine (NEJM) `Case Challenges `_, i.e., -NEJM Case Records of the Massachusetts General Hospital. - - - - - -Installation -^^^^^^^^^^^^ - -Most users should download the prebuilt executable file from the -`Releases `_ page of the GutHub repository. - -It is also possible to build the application from source using standard Maven and Java tools. - -.. code-block:: shell - :linenos: - - git clone https://github.com/monarch-initiative/phenopacket2prompt.git - cd phenopacket2prompt - maven package - java -jar target/phenopacket2prompt.jar - -Setup -^^^^^ - -First download the latest copy of the `Human Phenotype Ontology `_ hp.json file. This file is -used for text mining of clinical signs and symptoms. For more information about the HPO, see -`Koehler et al. (2021) `_. Adjust the path to the `phenopacket2prompt.jar` -file as necessary. - -.. code-block:: shell - :linenos: - - java -jar phenopacket2prompt.jar download - - -Preparing the case report files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -TODO describe the Python script or add it to this repo - - -Running phenopacket2prompt -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Assuming the hp.json file has been downloaded as described above and all of the case report text files -are available in a directory at ``some/path/gptdocs``, run - - -.. code-block:: shell - :linenos: - - java -jar phenopacket2prompt.jar gpt -g some/path/gptdocs - -This command will create a new directory called ``gptOut`` (this can be adjusted using the -o option). -It will contain four subdirectories - -1. phenopackets. GA4GH phenopackets derived from each case report -2. phenopacket_based_queries. Feature-based query prompts for GPT-4 based on the information in the phenopackets -3. txt_without_discussion. Original query based on the original case report with text as presented by the first discussant up to but not including text contributed by the second discussant or any following text -4. txt_with_differential. Text that starts with the presentation by the first discussant up to and including the differential. This was used to check parsing but was not used in our analysis. - - -.. toctree:: - :caption: contents - :name: phenopacket2prompt - :maxdepth: 1 - - cases/PMID_33471980.rst - authors - developers - history - LICENSE - - - --------- -Feedback --------- - -The best place to leave feedback, ask questions, and report bugs is the `WN2vec Issue Tracker `_. - diff --git a/docs/languages.md b/docs/languages.md new file mode 100644 index 0000000..06342ec --- /dev/null +++ b/docs/languages.md @@ -0,0 +1,46 @@ +# Languages + +phenopacket2prompt creates phenopackets in various languages using a template system. +(TODO explain HPO translations). + + +## The template + +Phenopackets are generated according to the same scheme (template) in all languages. The following example explains the template in English. + + +The template contains a series of components that can be translated in isolation. + +### Header + +This is the explanation for the start of the prompt for GPT. + +:bulb: +I am running an experiment on a clinicopathological case conference to see how your diagnoses +compare with those of human experts. I am going to give you part of a medical case. ... After you read the case, +I want you to give two pieces of information. The first piece of information is your most likely +diagnosis/diagnoses. You need to be as specific as possible -- the goal is to get the correct +answer, not a broad category of answers. +Do you have any questions, Dr. GPT-4? + + +### Describe the individual + +In this section, we present the age and sex of the proband (patient or individual) and the symptoms with which they presented. Each individual can have an age of onset and an age at last examination. Therefore, we have this + +1. Age of onset and age at last examination available +The proband was a 39-year old woman who presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + +2. Age at last examination available but age of onset not available +The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + +3. Age at last examination not available but age of onset available +The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + +4. No age information available +The proband presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + +### Describe findings at other specified ages +Some of the phenopackets have multiple ages at which specific features were first observed. Each of these is written as follows. + +At the age of 42 years, he/she/the individual presented with HPO1, HPO2, and HPO3, and HPO4 and HPO5 were excluded. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..b889f6d --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,65 @@ +site_name: phenopacket2prompt + +theme: + name: "material" + features: + - navigation.tabs + - navigation.sections + - toc.integrate + - navigation.top + - search.suggest + - search.highlight + - content.tabs.link + - content.code.annotation + - content.code.copy + - content.code.select + language: en + palette: + - scheme: default + toggle: + icon: material/toggle-switch-off-outline + name: Switch to dark mode + primary: indigo + accent: white + - scheme: slate + toggle: + icon: material/toggle-switch + name: Switch to light mode + primary: teal + accent: lime + + +nav: + - Home: 'index.md' + - Languages: + - "Template": 'languages.md' + - "English": "english.md" + +plugins: + - search + + + + +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - admonition + - pymdownx.arithmatex: + generic: true + - footnotes + - pymdownx.details + - pymdownx.superfences + - pymdownx.mark + - attr_list + - md_in_html + - tables + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg + +copyright: | + © 2023-2024 Peter N Robinson + diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/IndividualInformation.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/IndividualInformation.java new file mode 100644 index 0000000..1398043 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/IndividualInformation.java @@ -0,0 +1,13 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; + +public record IndividualInformation(PhenopacketSex psex, + String ageSexAtLastExam) { + + + + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java deleted file mode 100644 index 1a01c97..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeGenerator.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output; - -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; - -public interface PhenopacketAgeGenerator { - - String age(PhenopacketAge ppktAge); - - String atAge(PhenopacketAge ppktAge); - - -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeSexGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeSexGenerator.java new file mode 100644 index 0000000..7da409e --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketAgeSexGenerator.java @@ -0,0 +1,16 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; + +public interface PhenopacketAgeSexGenerator { + + String individualWithAge(PhenopacketAge ppktAge); + + String atAge(PhenopacketAge ppktAge); + + String ppktSex(); + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java deleted file mode 100644 index 0662d70..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketSexGenerator.java +++ /dev/null @@ -1,11 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output; - -import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; - -public interface PhenopacketSexGenerator { - - - String ppktSex(PpktIndividual individual); - - -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index fedea76..82a0510 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -4,10 +4,6 @@ import org.monarchinitiative.phenopacket2prompt.international.HpInternational; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.PhenopacketSexEnglish; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktAgeEnglish; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktPhenotypicfeatureEnglish; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktTextEnglish; import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*; public interface PromptGenerator { @@ -22,19 +18,13 @@ public interface PromptGenerator { public static PromptGenerator english(Ontology ontology){ - PhenopacketSexGenerator sgen = new PhenopacketSexEnglish(); - PhenopacketAgeGenerator page = new PpktAgeEnglish(); - PhenopacketTextGenerator ptext = new PpktTextEnglish(); - PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureEnglish(); - return new EnglishPromptGenerator(ontology, sgen, page, ptext, pfgen); + + return new EnglishPromptGenerator(ontology); } static PromptGenerator spanish(Ontology hpo, HpInternational international) { - PhenopacketSexGenerator sgen = new PhenopacketSexSpanish(); - PhenopacketAgeGenerator page = new PpktAgeSpanish(); - PhenopacketTextGenerator ptext = new PpktTextSpanish(); PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureSpanish(international); - return new SpanishPromptGenerator(hpo, sgen, page, ptext, pfgen); + return new SpanishPromptGenerator(hpo, pfgen); } default String createPrompt(PpktIndividual individual) { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java index e8c1cf6..d162c0f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java @@ -12,9 +12,8 @@ public class EnglishPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketSexGenerator sexGenerator; - private final PhenopacketAgeGenerator ppktAgeGenerator; + private final PhenopacketAgeSexGenerator ppktAgeGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -22,12 +21,11 @@ public class EnglishPromptGenerator implements PromptGenerator { - public EnglishPromptGenerator(Ontology hpo, PhenopacketSexGenerator sgen, PhenopacketAgeGenerator page, PhenopacketTextGenerator ptext, PpktPhenotypicFeatureGenerator pfgen){ + public EnglishPromptGenerator(Ontology hpo){ this.hpo = hpo; - sexGenerator = sgen; - ppktAgeGenerator = page; - ppktTextGenerator = ptext; - this.ppktPhenotypicFeatureGenerator = pfgen; + ppktAgeGenerator = new PpktAgeSexEnglish(); + ppktTextGenerator = new PpktTextEnglish(); + this.ppktPhenotypicFeatureGenerator = new PpktPhenotypicfeatureEnglish(); } @@ -36,7 +34,7 @@ public EnglishPromptGenerator(Ontology hpo, PhenopacketSexGenerator sgen, Phenop @Override public String getIndividualInformation(PpktIndividual ppktIndividual) { StringBuilder sb = new StringBuilder(); - String sex = sexGenerator.ppktSex(ppktIndividual); + /*String sex = sexGenerator.ppktSex(ppktIndividual); Optional lastAgeOpt = ppktIndividual.getAgeAtLastExamination(); Optional onsetOpt = ppktIndividual.getAgeAtOnset(); if (lastAgeOpt.isPresent()) { @@ -50,7 +48,7 @@ public String getIndividualInformation(PpktIndividual ppktIndividual) { PhenopacketAge onsetAge = onsetOpt.get(); String onset = ppktAgeGenerator.age(onsetAge); sb.append("Initial manifestations of disease appeared when the proband was ").append(onset).append(". "); - } + }*/ return sb.toString(); } @@ -80,7 +78,7 @@ public String getPhenotypicFeatures(PpktIndividual ppktIndividual) { } } } else { - String ageString = ppktAgeGenerator.age(age); + String ageString = "";//ppktAgeGenerator.age(age); if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { sb.append(ageString).append(", the following clinical manifestations were observed: ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". "); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java deleted file mode 100644 index 523b3f5..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PhenopacketSexEnglish.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl.english; - -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; -import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketSexGenerator; - -import java.util.Optional; - -public class PhenopacketSexEnglish implements PhenopacketSexGenerator { - @Override - public String ppktSex(PpktIndividual individual) { - PhenopacketSex psex = individual.getSex(); - Optional ageOpt = individual.getAgeAtLastExamination(); - if (ageOpt.isEmpty()) { - ageOpt = individual.getAgeAtOnset(); - } - if (ageOpt.isEmpty()) { - return switch (psex) { - case FEMALE -> "female"; - case MALE -> "male"; - default -> "individual"; - }; - } - PhenopacketAge age = ageOpt.get();; - if (age.isChild()) { - return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; - }; - } else if (age.isCongenital()) { - return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; - }; - } else if (age.isFetus()) { - return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; - }; - } else if (age.isInfant()) { - return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; - }; - } else { - return switch (psex) { - case FEMALE -> "woman"; - case MALE -> "man"; - default -> "individual"; - }; - } - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java deleted file mode 100644 index a3bd78c..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeEnglish.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl.english; - -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAgeType; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketAgeGenerator; - -public class PpktAgeEnglish implements PhenopacketAgeGenerator { - @Override - public String age(PhenopacketAge ppktAge) { - if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { - return ppktAge.age() + " old"; - } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { - String label = ppktAge.age(); // something like "Infantile onset" - return switch (label) { - case "Infantile onset" -> "infant"; - case "Childhood onset" -> "child"; - case "Neonatal onset" -> "neonate"; - case "Congenital onset" -> "born"; - case "Adult onset" -> "adult"; - default-> String.format("During the %s", label.replace(" onset", "")); - }; - } else { - return ""; // should never get here - } - } - - @Override - public String atAge(PhenopacketAge ppktAge) { - if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { - return "At an age of " + ppktAge.age(); - } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { - String label = ppktAge.age(); // something like "Infantile onset" - return switch (label) { - case "Infantile onset" -> "During the infantile period"; - case "Childhood onset" -> "During childhood"; - case "Neonatal onset" -> "During the neonatal period"; - case "Congenital onset" -> "At birth"; - case "Adult onset" -> "As an adult"; - default-> String.format("During the %s", label.replace(" onset", "")); - }; - } else { - return ""; // should never get here - } - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeSexEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeSexEnglish.java new file mode 100644 index 0000000..6363c3f --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktAgeSexEnglish.java @@ -0,0 +1,110 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAgeType; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketAgeSexGenerator; + +import java.util.Optional; + +public class PpktAgeSexEnglish implements PhenopacketAgeSexGenerator { + + public PpktAgeSexEnglish() { + + } + + + + private String individualName(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } + + + + @Override + public String individualWithAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return ppktAge.age() + " old"; + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "infant"; + case "Childhood onset" -> "child"; + case "Neonatal onset" -> "neonate"; + case "Congenital onset" -> "born"; + case "Adult onset" -> "adult"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "At an age of " + ppktAge.age(); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "During the infantile period"; + case "Childhood onset" -> "During childhood"; + case "Neonatal onset" -> "During the neonatal period"; + case "Congenital onset" -> "At birth"; + case "Adult onset" -> "As an adult"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + @Override + public String ppktSex() { + return ""; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java deleted file mode 100644 index 6a3298e..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PhenopacketSexSpanish.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; - -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; -import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketSexGenerator; - -import java.util.Optional; - -public class PhenopacketSexSpanish implements PhenopacketSexGenerator { - @Override - public String ppktSex(PpktIndividual individual) { - PhenopacketSex psex = individual.getSex(); - Optional ageOpt = individual.getAgeAtLastExamination(); - if (ageOpt.isEmpty()) { - ageOpt = individual.getAgeAtOnset(); - } - if (ageOpt.isEmpty()) { - return switch (psex) { - case FEMALE -> "female"; - case MALE -> "male"; - default -> "individual"; - }; - } - PhenopacketAge age = ageOpt.get();; - if (age.isChild()) { - return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; - }; - } else if (age.isCongenital()) { - return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; - }; - } else if (age.isFetus()) { - return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; - }; - } else if (age.isInfant()) { - return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; - }; - } else { - return switch (psex) { - case FEMALE -> "woman"; - case MALE -> "man"; - default -> "individual"; - }; - } - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSexSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSexSpanish.java new file mode 100644 index 0000000..d931b32 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSexSpanish.java @@ -0,0 +1,277 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.IndividualInformation; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketAgeSexGenerator; + +import java.util.Optional; + +public class PpktAgeSexSpanish implements PhenopacketAgeSexGenerator { + + + public PpktAgeSexSpanish() { + } + + + + public IndividualInformation getInformation(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + String ageSexAtLastExam = ageAndSexAtLastExamination(individual); + + + + return new IndividualInformation(psex, ageSexAtLastExam); + } + + + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = "una paciente femenina"; + case MALE -> sex = "un paciente masculino"; + default -> sex = "una persona"; + }; + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("una mujer de %d años", y); + } else if (y > 9) { + return String.format("una adolescente de %d años", y); + + } else if (y > 0) { + return String.format("una niña de %d años", y); + } else if (m>0) { + return String.format("una bebe niña de %d meses", m); + } else { + return String.format("una recien nacida %d meses", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "una niña"; + case MALE -> "un niño"; + default -> "un niño"; // difficult to be gender neutral + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "una recien nacida"; + case MALE -> "un recien nacido"; + default -> "un recien nacido"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "un feto femenino"; + case MALE -> "un feto masculino"; + default -> "un feto"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "un bebé femenino"; + case MALE -> "un bebé masculino"; + default -> "un bebé"; + }; + } else { + return switch (psex) { + case FEMALE -> "un mujer"; + case MALE -> "un hombre"; + default -> "una persona adulta"; + }; + } + } + + + private String individualName(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } + + + @Override + public String individualWithAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return ppktAge.age() + " old"; + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "bebé"; + case "Childhood onset" -> "niño"; + case "Neonatal onset" -> "neonate"; + case "Congenital onset" -> "recién nacido"; + case "Adult onset" -> "adulto"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + + private String atIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("%d años", y); + } else if (y > 0) { + if (m > 1) { + return String.format("%d años y %d meses", y, m); + } else if (m == 1) { + return String.format("%d años y un mes", y); + } else { + return String.format("%d años", y); + } + } else if (m>0) { + return String.format("%d meses y %d días", m, d); + } else { + return String.format("%d días", d); + } + } + + + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "A la edad de " + atIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Durante el periodo infantil"; + case "Childhood onset" -> "Durante la infancia"; + case "Neonatal onset" -> "Durante el periodo neonatal"; + case "Congenital onset" -> "Al nacer"; + case "Adult onset" -> "Como adulto"; + default-> String.format("Durante el %s periodo", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } + + @Override + public String ppktSex() { + return ""; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java deleted file mode 100644 index 9e64106..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktAgeSpanish.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; - -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAgeType; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketAgeGenerator; - -public class PpktAgeSpanish implements PhenopacketAgeGenerator { - @Override - public String age(PhenopacketAge ppktAge) { - if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { - return ppktAge.age() + " old"; - } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { - String label = ppktAge.age(); // something like "Infantile onset" - return switch (label) { - case "Infantile onset" -> "bebé"; - case "Childhood onset" -> "niño"; - case "Neonatal onset" -> "neonate"; - case "Congenital onset" -> "recién nacido"; - case "Adult onset" -> "adulto"; - default-> String.format("During the %s", label.replace(" onset", "")); - }; - } else { - return ""; // should never get here - } - } - - @Override - public String atAge(PhenopacketAge ppktAge) { - if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { - return "A la edad de " + ppktAge.age(); - } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { - String label = ppktAge.age(); // something like "Infantile onset" - return switch (label) { - case "Infantile onset" -> "Durante el periodo infantil"; - case "Childhood onset" -> "Durante la infancia"; - case "Neonatal onset" -> "Durante el periodo neonatal"; - case "Congenital onset" -> "Al nacer"; - case "Adult onset" -> "Como adulto"; - default-> String.format("Durante el %s periodo", label.replace(" onset", "")); - }; - } else { - return ""; // should never get here - } - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java index 0c99471..1d4a053 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -12,9 +12,8 @@ public class SpanishPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketSexGenerator sexGenerator; - private final PhenopacketAgeGenerator ppktAgeGenerator; + private final PhenopacketAgeSexGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -22,11 +21,10 @@ public class SpanishPromptGenerator implements PromptGenerator { - public SpanishPromptGenerator(Ontology hpo, PhenopacketSexGenerator sgen, PhenopacketAgeGenerator page, PhenopacketTextGenerator ptext, PpktPhenotypicFeatureGenerator pfgen) { + public SpanishPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { this.hpo = hpo; - sexGenerator = sgen; - ppktAgeGenerator = page; - ppktTextGenerator = ptext; + ppktAgeSexGenerator = new PpktAgeSexSpanish(); + ppktTextGenerator = new PpktTextSpanish(); this.ppktPhenotypicFeatureGenerator = pfgen; } @@ -38,21 +36,21 @@ public String queryHeader() { @Override public String getIndividualInformation(PpktIndividual ppktIndividual) { StringBuilder sb = new StringBuilder(); - String sex = sexGenerator.ppktSex(ppktIndividual); + /* String sex = sexGenerator.ppktSex(ppktIndividual); Optional lastAgeOpt = ppktIndividual.getAgeAtLastExamination(); Optional onsetOpt = ppktIndividual.getAgeAtOnset(); if (lastAgeOpt.isPresent()) { PhenopacketAge lastExamAge = lastAgeOpt.get(); - String examAge = ppktAgeGenerator.age(lastExamAge); + String examAge = ppktAgeSexGenerator.age(lastExamAge); sb.append("El probando era un ").append(examAge).append( " ").append(sex).append(". "); } else { sb.append("El probando era un ").append(sex).append(". "); } if (onsetOpt.isPresent()) { PhenopacketAge onsetAge = onsetOpt.get(); - String onset = ppktAgeGenerator.age(onsetAge); + String onset = ppktAgeSexGenerator.age(onsetAge); sb.append("Las manifestaciones iniciales de la enfermedad aparecieron cuando el probando era ").append(onset).append(". "); - } + }*/ return sb.toString(); } @@ -82,7 +80,7 @@ public String getPhenotypicFeatures(PpktIndividual ppktIndividual) { } } } else { - String ageString = ppktAgeGenerator.age(age); + String ageString = "";//ppktAgeSexGenerator.age(age); if (ppktPhenotypicFeatureGenerator.hasObservedFeatures(terms)) { sb.append(ageString).append(", se observaron las siguientes manifestaciones clínicas: ").append(ppktPhenotypicFeatureGenerator.featureList(terms)).append(". ");