Skip to content

Commit

Permalink
Merge branch 'develop' into nl-translation
Browse files Browse the repository at this point in the history
  • Loading branch information
KyranWissink authored May 17, 2024
2 parents 21bc283 + 96f2b6b commit a226a60
Show file tree
Hide file tree
Showing 11 changed files with 717 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ public Integer call() throws Exception {
// output all non-English languages here
PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es"));
outputPromptsInternational(ppktFiles, hpo, "es", spanish);

PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl"));
outputPromptsInternational(ppktFiles, hpo, "nl", dutch);

PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de"));
outputPromptsInternational(ppktFiles, hpo, "de", german);

// output file with correct diagnosis list
outputCorrectResults(correctResultList);
return 0;
Expand All @@ -85,8 +90,8 @@ private void outputCorrectResults(List<CorrectResult> correctResultList) {
}


private String getFileName(String phenopacketID) {
return phenopacketID.replaceAll("[^\\w]", phenopacketID).replaceAll("/","_") + "-prompt.txt";
private String getFileName(String phenopacketID, String languageCode) {
return phenopacketID.replaceAll("[^\\w]","_") + "_" + languageCode + "-prompt.txt";
}


Expand All @@ -99,11 +104,11 @@ private void outputPromptsInternational(List<File> ppktFiles, Ontology hpo, Stri
PpktIndividual individual = new PpktIndividual(f);
List<PhenopacketDisease> diseaseList = individual.getDiseases();
if (diseaseList.size() != 1) {
System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()));
System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId());
continue;
}
PhenopacketDisease pdisease = diseaseList.get(0);
String promptFileName = getFileName( individual.getPhenopacketId());
String promptFileName = getFileName( individual.getPhenopacketId(), languageCode);
String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath());
try {
diagnosisList.add(diagnosisLine);
Expand All @@ -125,11 +130,11 @@ private List<CorrectResult> outputPromptsEnglish(List<File> ppktFiles, Ontology
PpktIndividual individual = new PpktIndividual(f);
List<PhenopacketDisease> diseaseList = individual.getDiseases();
if (diseaseList.size() != 1) {
System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()));
System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId());
continue;
}
PhenopacketDisease pdisease = diseaseList.get(0);
String promptFileName = getFileName( individual.getPhenopacketId());
String promptFileName = getFileName( individual.getPhenopacketId(), "en");
String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath());
try {
String prompt = generator.createPrompt(individual);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,17 @@ public Integer call() throws Exception {
PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es"));
prompt = spanish.createPrompt(individual);
System.out.println(prompt);

// DUTCH
System.out.println("DUTCH");
PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl"));
prompt = dutch.createPrompt(individual);

// GERMAN
System.out.println("GERMAN");
PromptGenerator german = PromptGenerator.spanish(hpo, internationalMap.get("de"));
prompt = german.createPrompt(individual);

System.out.println(prompt);

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public Optional<String> getTranslation(String annots) {
public HpInternationalOboParser(File file) {
languageToInternationalMap = new HashMap<>();
String pattern = "id: (HP:\\d{7,7})";
Set<String> acronyms = Set.of("cs", "en", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh");
Set<String> acronyms = Set.of("cs", "en", "de", "it", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh");
for (String acronym : acronyms) {
languageToInternationalMap.put(acronym, new HpInternational(acronym));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex;
import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual;
import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator;
import org.monarchinitiative.phenopacket2prompt.output.impl.german.GermanPromptGenerator;
import org.monarchinitiative.phenopacket2prompt.output.impl.german.PpktPhenotypicfeatureGerman;
import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*;
import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.*;

Expand Down Expand Up @@ -34,10 +36,18 @@ static PromptGenerator spanish(Ontology hpo, HpInternational international) {
return new SpanishPromptGenerator(hpo, pfgen);
}


static PromptGenerator dutch(Ontology hpo, HpInternational international) {
PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureDutch(international);
return new DutchPromptGenerator(hpo, pfgen);
}

static PromptGenerator german(Ontology hpo, HpInternational international) {
PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international);
return new GermanPromptGenerator(hpo, pfgen);
}


/**
* The following structure should work for most other languages, but the function
* can be overridden if necessary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ public class PpktTextEnglish implements PhenopacketTextGenerator {
@Override
public String QUERY_HEADER() {
return """
I am running an experiment on a clinical case report to see how your diagnoses compare with those of human experts. I am going to give you part of a medical case. You are not trying to treat any patients. In this case, you are “Dr. GPT-4,” an AI language model who is providing a diagnosis Here are some guidelines. First, there is a single definitive diagnosis, and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always confirmed by some sort of genetic test, though in rare cases when such a test does not exist for a diagnosis the diagnosis can instead be made using validated clinical criteria or very rarely just confirmed by expert opinion. After you read the case, I want you to give a differential diagnosis with a list of candidate diagnoses ranked by probability starting with the most likely candidate. Each candidate should be specified with the OMIM identifier and disease name. For instance, if the first candidate is Branchiooculofacial syndrome and the second is Cystic fibrosis, provide this:
I am running an experiment on a clinical case report to see how your diagnoses compare with those of human experts. I am going to give you part of a medical case. You are not trying to treat any patients. In this case, you are “Dr. GPT-4”, an AI language model who is providing a diagnosis. Here are some guidelines. First, there is a single definitive diagnosis, and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always confirmed by some sort of genetic test, though in rare cases when such a test does not exist for a diagnosis the diagnosis can instead be made using validated clinical criteria or very rarely just confirmed by expert opinion. After you read the case, I want you to give a differential diagnosis with a list of candidate diagnoses ranked by probability starting with the most likely candidate. Each candidate should be specified with disease name. For instance, if the first candidate is Branchiooculofacial syndrome and the second is Cystic fibrosis, provide this:
1. OMIM:113620 - Branchiooculofacial syndrome
2. OMIM:219700 - Cystic fibrosis
1. Branchiooculofacial syndrome
2. Cystic fibrosis
This list should provide as many diagnoses as you think are reasonable.
You do not need to explain your reasoning, just list the diagnoses together with the OMIM identifiers.
You do not need to explain your reasoning, just list the diagnoses.
Here is the case:
""";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.monarchinitiative.phenopacket2prompt.output.impl.german;

import org.monarchinitiative.phenol.ontology.data.Ontology;
import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm;
import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge;
import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex;
import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual;
import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator;
import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator;
import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator;
import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator;

import java.util.List;

public class GermanPromptGenerator implements PromptGenerator {

private final Ontology hpo;


private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator;

private final PhenopacketTextGenerator ppktTextGenerator;

private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator;



public GermanPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) {
this.hpo = hpo;
ppktAgeSexGenerator = new PpktIndividualGerman();
ppktTextGenerator = new PpktTextGerman();
this.ppktPhenotypicFeatureGenerator = pfgen;
}

@Override
public String queryHeader() {
return ppktTextGenerator.QUERY_HEADER();
}

@Override
public String getIndividualInformation(PpktIndividual ppktIndividual) {
return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual);
}

@Override
public String formatFeatures(List<OntologyTerm> ontologyTerms) {
return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms);
}

@Override
public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List<OntologyTerm> terms) {
String ageString = this.ppktAgeSexGenerator.atAge(page);
String features = formatFeatures(terms);
return String.format("%s, %s presentó %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features);
}







}
Loading

0 comments on commit a226a60

Please sign in to comment.