diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java index 15ea2d65..0879f571 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java @@ -68,10 +68,10 @@ public static BoolQuery getClosedQuery(GepiRequestData requestData, Set BoolQuery filterQuery = new BoolQuery(); BoolClause.Occur sentenceParagraphOccur = requestData.getFilterFieldsConnectionOperator().equalsIgnoreCase("and") ? MUST : SHOULD; if (!StringUtils.isBlank(requestData.getSentenceFilterString())) { - addFulltextSearchQuery(requestData.getSentenceFilterString(), FIELD_EVENT_SENTENCE, sentenceParagraphOccur, filterQuery); + addFulltextSearchQuery(requestData.getSentenceFilterString(), FIELD_EVENT_SENTENCE_TEXT, sentenceParagraphOccur, filterQuery); } if (!StringUtils.isBlank(requestData.getParagraphFilterString())) { - addFulltextSearchQuery(requestData.getParagraphFilterString(), FIELD_EVENT_PARAGRAPH, sentenceParagraphOccur, filterQuery); + addFulltextSearchQuery(requestData.getParagraphFilterString(), FIELD_EVENT_PARAGRAPH_TEXT, sentenceParagraphOccur, filterQuery); } if (!StringUtils.isBlank(requestData.getSectionNameFilterString())) { addFulltextSearchQuery(requestData.getSectionNameFilterString(), FIELD_PARAGRAPH_HEADINGS, FILTER, eventQuery); @@ -113,10 +113,10 @@ public static BoolQuery getOpenQuery(GepiRequestData requestData) throws Interru BoolQuery filterQuery = new BoolQuery(); BoolClause.Occur sentenceParagraphOccur = requestData.getFilterFieldsConnectionOperator().equalsIgnoreCase("and") ? MUST : SHOULD; if (!StringUtils.isBlank(sentenceFilter)) { - addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE, sentenceParagraphOccur, filterQuery); + addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE_TEXT, sentenceParagraphOccur, filterQuery); } if (!StringUtils.isBlank(paragraphFilter)) { - addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH, sentenceParagraphOccur, filterQuery); + addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH_TEXT, sentenceParagraphOccur, filterQuery); } if (!StringUtils.isBlank(sectionNameFilter)) { addFulltextSearchQuery(sectionNameFilter, FIELD_PARAGRAPH_HEADINGS, FILTER, eventQuery); @@ -147,10 +147,10 @@ public static BoolQuery getFulltextQuery(List eventTypes, String sentenc BoolQuery fulltextQuery = new BoolQuery(); BoolClause.Occur filterFieldsOccur = filterFieldsConnectionOperator.equalsIgnoreCase("and") ? MUST : BoolClause.Occur.SHOULD; if (!StringUtils.isBlank(sentenceFilter)) { - addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE, filterFieldsOccur, fulltextQuery); + addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE_TEXT, filterFieldsOccur, fulltextQuery); } if (!StringUtils.isBlank(paragraphFilter)) { - addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH, filterFieldsOccur, fulltextQuery); + addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH_TEXT, filterFieldsOccur, fulltextQuery); } if (!StringUtils.isBlank(sectionNameFilter)) { addFulltextSearchQuery(sectionNameFilter, FIELD_PARAGRAPH_HEADINGS, filterFieldsOccur, eventQuery); diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventResponseProcessingService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventResponseProcessingService.java index 5a2005a0..3bc60bea 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventResponseProcessingService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventResponseProcessingService.java @@ -11,9 +11,11 @@ import org.slf4j.Logger; import java.util.*; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.*; @@ -73,10 +75,14 @@ private Stream resultDocuments2Events(Stream docum List matchTypes = eventDocument.getFieldValues(FIELD_EVENT_ARG_MATCH_TYPES).orElse(Collections.emptyList()); Optional mainEventType = eventDocument.get(FIELD_EVENT_MAINEVENTTYPE); Optional likelihood = eventDocument.get(FIELD_EVENT_LIKELIHOOD); - Optional sentence = eventDocument.get(FIELD_EVENT_SENTENCE); - Optional paragraph = eventDocument.get(FIELD_EVENT_PARAGRAPH); - List sentenceHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE); - List paragraphHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH); + Optional sentence = eventDocument.get(FIELD_EVENT_SENTENCE_TEXT); + Optional paragraph = eventDocument.get(FIELD_EVENT_PARAGRAPH_TEXT); + List sentenceArgumentHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE_TEXT); + List sentenceTriggerHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE_TEXT_TRIGGER); + List sentenceFilterHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE_TEXT_FILTER); + List paragraphArgumentHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH_TEXT); + List paragraphTriggerHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH_TEXT_TRIGGER); + List paragraphFilterHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH_TEXT_FILTER); List geneMappingSources = eventDocument.getFieldValues(FIELD_GENE_MAPPING_SOURCE).orElse(Collections.emptyList()).stream().map(Object::toString).collect(Collectors.toList()); String eventId = eventDocument.getId(); @@ -115,12 +121,12 @@ private Stream resultDocuments2Events(Stream docum if (mainEventType.isPresent()) event.setMainEventType(mainEventType.get()); event.setAllEventTypes(allEventTypes.stream().map(String.class::cast).collect(Collectors.toList())); - if (sentenceHl != null && !sentenceHl.isEmpty()) - event.setHlSentence(StringUtils.normalizeSpace(sentenceHl.get(0))); + if (sentenceArgumentHl != null && !sentenceArgumentHl.isEmpty()) + event.setHlSentence(StringUtils.normalizeSpace(sentenceArgumentHl.get(0))); if (sentence.isPresent()) event.setSentence(StringUtils.normalizeSpace(sentence.get())); - if (paragraphHl != null && !paragraphHl.isEmpty()) - event.setHlParagraph(StringUtils.normalizeSpace(paragraphHl.get(0))); + if (paragraphFilterHl != null && !paragraphFilterHl.isEmpty()) + event.setHlParagraph(StringUtils.normalizeSpace(paragraphFilterHl.get(0))); if (paragraph.isPresent()) event.setParagraph(StringUtils.normalizeSpace(paragraph.get())); for (int i = 0; i < event.getNumArguments(); i++) { @@ -134,18 +140,74 @@ private Stream resultDocuments2Events(Stream docum } if (event.getHlSentence() != null) { Matcher fulltextQueryHighlightedMatcher = FULLTEXT_QUERY_HIGHLIGHT_PATTERN.matcher(event.getHlSentence()); - if (fulltextQueryHighlightedMatcher.find()) - event.setSentenceMatchingFulltextQuery(true); +// if (fulltextQueryHighlightedMatcher.find()) +// event.setSentenceMatchingFulltextQuery(true); } if (event.getHlParagraph() != null) { Matcher fulltextQueryHighlightedMatcher = FULLTEXT_QUERY_HIGHLIGHT_PATTERN.matcher(event.getHlParagraph()); - if (fulltextQueryHighlightedMatcher.find()) - event.setParagraphMatchingFulltextQuery(true); +// if (fulltextQueryHighlightedMatcher.find()) +// event.setParagraphMatchingFulltextQuery(true); } + event.setSentenceMatchingFulltextQuery(sentenceFilterHl != null && !sentenceFilterHl.isEmpty()); + event.setParagraphMatchingFulltextQuery(paragraphFilterHl != null && !paragraphFilterHl.isEmpty()); event.setGeneMappingSources(geneMappingSources); return event; }).filter(Objects::nonNull); } + /** + *

Merges different highlighting of the same text string via HTML tags into a single text string with all the highlight tags.

+ * + * @param highlights The different highlightings of the same text. + * @return The combined highlighted string or null if all input highlights were null. + */ + private String mergeHighlighting(String... highlights) { + Pattern tagPattern = Pattern.compile("<[^>]+>"); + // Build position-tag maps. This list will contain one position-tag map for each highlighted string + List> tagMaps = new ArrayList<>(); + for (String hl : highlights) { + if (hl == null) + continue; + final Matcher tagMatcher = tagPattern.matcher(hl); + // Sums up the encountered tag lengths. Thus, the start of a tag in the highlighted string minus the offset + // is the start of the tag without counting previous tags, hence, in the original string. + int offset = 0; + // This map stores the position of each tag in the original, non-highlighted string. + SortedMap pos2tag = new TreeMap<>(); + while (tagMatcher.find()) { + final int tagPos = tagMatcher.start(); + final String tag = tagMatcher.group(); + pos2tag.put(tagPos - offset, tag); + offset += tag.length(); + } + } + if (tagMaps.isEmpty()) + return null; + + // we will add the elements of the merged highlight string from end to start + List reversedMergedHighlight = new ArrayList<>(); + String nonHighlightedString = tagPattern.matcher(highlights[0]).replaceAll(""); + int lastPos = nonHighlightedString.length(); + int maxPosIndex = Integer.MIN_VALUE; + // Assemble the merged highlight string. In each iteration we determine the remaining tag with the largest + // offset and add the text between it and the previous tag and itself to the merged string. + while (tagMaps.stream().anyMatch(Predicate.not(Map::isEmpty))) { + for (var tagMap : tagMaps) + maxPosIndex = Math.max(maxPosIndex, tagMap.lastKey()); + // the last tag as in the highest offset position of all tags of all highlights + final SortedMap lastTagMap = tagMaps.get(maxPosIndex); + int pos = lastTagMap.lastKey(); + String tag = lastTagMap.get(pos); + reversedMergedHighlight.add(nonHighlightedString.substring(pos + tag.length(), lastPos)); + reversedMergedHighlight.add(tag); + // Save the start position of this tag. For the next tag we will need it as the end point of the substring + // on the nonHighlightedString. + lastPos = pos; + // Remove this tag so that in the next iteration we get the preceeding one. + lastTagMap.remove(pos); + } + return IntStream.range(0, reversedMergedHighlight.size()).mapToObj(i -> reversedMergedHighlight.get(reversedMergedHighlight.size() - i)).collect(Collectors.joining()); + } + } diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java index 5336c1fc..e9f51d76 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java @@ -91,9 +91,17 @@ public class EventRetrievalService implements IEventRetrievalService { public static final String FIELD_EVENT_ARG2_HOMOLOGY_PREFERRED_NAME = "argument2homoprefname"; - public static final String FIELD_EVENT_SENTENCE = "sentence.text"; + public static final String FIELD_EVENT_SENTENCE_TEXT = "sentence.text"; - public static final String FIELD_EVENT_PARAGRAPH = "paragraph.text"; + public static final String FIELD_EVENT_PARAGRAPH_TEXT = "paragraph.text"; + + public static final String FIELD_EVENT_SENTENCE_TEXT_FILTER = "sentence.text_filter"; + + public static final String FIELD_EVENT_PARAGRAPH_TEXT_FILTER = "paragraph.text_filter"; + + public static final String FIELD_EVENT_SENTENCE_TEXT_TRIGGER = "sentence.text_trigger"; + + public static final String FIELD_EVENT_PARAGRAPH_TEXT_TRIGGER = "paragraph.text_trigger"; public static final String FIELD_PARAGRAPH_HEADINGS = "paragraph.headings"; @@ -104,7 +112,7 @@ public class EventRetrievalService implements IEventRetrievalService { FIELD_PMID, FIELD_PMCID, FIELD_EVENT_LIKELIHOOD, - FIELD_EVENT_SENTENCE, + FIELD_EVENT_SENTENCE_TEXT, FIELD_EVENT_MAINEVENTTYPE, FIELD_EVENT_ALL_EVENTTYPES, FIELD_EVENT_ARG_GENE_IDS, @@ -192,18 +200,7 @@ public CompletableFuture closedSearch(GepiRequestData requ if (!forCharts) serverRqst.addSortCommand("_doc", SortOrder.ASCENDING); if (!forCharts) { - HighlightCommand hlc = new HighlightCommand(); - hlc.addField(FIELD_EVENT_SENTENCE, 10, 0); - hlc.addField(FIELD_EVENT_PARAGRAPH, 10, 0); - hlc.fields.forEach(f -> { - f.pre = ""; - f.post = ""; - TermQuery tq = new TermQuery(); - tq.field = f.field; - tq.term = "xargumentx"; - f.highlightQuery = tq; - }); - serverRqst.addHighlightCmd(hlc); + addHighlighting(serverRqst); } ElasticSearchCarrier carrier = new ElasticSearchCarrier<>("BipartiteEvents"); @@ -324,20 +321,40 @@ public SearchServerRequest getOpenSearchRequest(GepiRequestData requestData, int if (!forCharts) serverRqst.addSortCommand("_doc", SortOrder.ASCENDING); if (!forCharts) { - HighlightCommand hlc = new HighlightCommand(); - hlc.addField(FIELD_EVENT_SENTENCE, 10, 0); - hlc.addField(FIELD_EVENT_PARAGRAPH, 10, 0); - hlc.fields.forEach(f -> { - f.pre = ""; - f.post = ""; + addHighlighting(serverRqst); + } + return serverRqst; + } + + private void addHighlighting(SearchServerRequest serverRqst) { + serverRqst.addHighlightCmd(getHighlightCommand("xargumentx", "hl-argument", FIELD_EVENT_SENTENCE_TEXT, FIELD_EVENT_PARAGRAPH_TEXT)); + serverRqst.addHighlightCmd(getHighlightCommand("xtriggerx", "hl-trigger", FIELD_EVENT_SENTENCE_TEXT_TRIGGER, FIELD_EVENT_PARAGRAPH_TEXT_TRIGGER)); + serverRqst.addHighlightCmd(getHighlightCommand(null, "hl-filter", FIELD_EVENT_SENTENCE_TEXT_FILTER, FIELD_EVENT_PARAGRAPH_TEXT_FILTER)); + } + + /** + *

Created highlight commands required for GePI searches.

+ *

When hlTerm is not null, it used in a TermQuery that is specified as a highlight query. This is used to highlight only special terms like event argument and event trigger words. The respective placeholder terms - xargumentx and xtriggerx - have been added in the RelationDocumentGenerator in the indexing code. If hlTerm is null, the actual query terms are highlighted.

+ * + * @param hlTerm + * @param hlClass + * @return + */ + private HighlightCommand getHighlightCommand(String hlTerm, String hlClass, String... hlFields) { + HighlightCommand hlc = new HighlightCommand(); + for (String hlField : hlFields) + hlc.addField(hlField, 1, 0); + hlc.fields.forEach(f -> { + f.pre = ""; + f.post = ""; + if (hlTerm != null) { TermQuery tq = new TermQuery(); tq.field = f.field; - tq.term = "xargumentx"; + tq.term = hlTerm; f.highlightQuery = tq; - }); - serverRqst.addHighlightCmd(hlc); - } - return serverRqst; + } + }); + return hlc; } @@ -360,18 +377,7 @@ public CompletableFuture getFulltextFilteredEvents(GepiReq if (!forCharts) serverRqst.addSortCommand("_doc", SortOrder.ASCENDING); if (!forCharts) { - HighlightCommand hlc = new HighlightCommand(); - hlc.addField(FIELD_EVENT_SENTENCE, 10, 0); - hlc.addField(FIELD_EVENT_PARAGRAPH, 10, 0); - hlc.fields.forEach(f -> { - f.pre = ""; - f.post = ""; - TermQuery tq = new TermQuery(); - tq.field = f.field; - tq.term = "xargumentx"; - f.highlightQuery = tq; - }); - serverRqst.addHighlightCmd(hlc); + addHighlighting(serverRqst); } ElasticSearchCarrier carrier = new ElasticSearchCarrier("FulltextFilteredEvents"); diff --git a/gepi/gepi-core/src/test/resources/dockercontext/elasticsearch-mapper-preanalyzed-7.0.1-SNAPSHOT.zip b/gepi/gepi-core/src/test/resources/dockercontext/elasticsearch-mapper-preanalyzed-7.0.1-SNAPSHOT.zip deleted file mode 100644 index 50638002..00000000 Binary files a/gepi/gepi-core/src/test/resources/dockercontext/elasticsearch-mapper-preanalyzed-7.0.1-SNAPSHOT.zip and /dev/null differ diff --git a/gepi/gepi-core/src/test/resources/testconfiguration.properties b/gepi/gepi-core/src/test/resources/testconfiguration.properties index 8832ca38..65732a5b 100644 --- a/gepi/gepi-core/src/test/resources/testconfiguration.properties +++ b/gepi/gepi-core/src/test/resources/testconfiguration.properties @@ -1,6 +1,6 @@ #The port number is automatically set in de.julielab.gepi.core.retrieval.services.EventRetrievalServiceIntegrationTest -#Thu Oct 20 08:26:15 CEST 2022 +#Thu Oct 20 08:37:47 CEST 2022 elasticquery.url=localhost elasticquery.clustername=gepi_testcluster -elasticquery.port=61639 +elasticquery.port=63000 gepi.documents.index.name=gepi_testindex diff --git a/gepi/gepi-indexing/gepi-indexing-base/src/main/resources/elasticSearchMapping.json b/gepi/gepi-indexing/gepi-indexing-base/src/main/resources/elasticSearchMapping.json index b6cba8c6..b68b7176 100644 --- a/gepi/gepi-indexing/gepi-indexing-base/src/main/resources/elasticSearchMapping.json +++ b/gepi/gepi-indexing/gepi-indexing-base/src/main/resources/elasticSearchMapping.json @@ -196,6 +196,14 @@ "term_vector": "with_positions_offsets", "norms": false }, + "text_filter": { + "type": "alias", + "path": "sentence.text" + }, + "text_trigger": { + "type": "alias", + "path": "sentence.text" + }, "id": { "type": "keyword", "store": true @@ -215,6 +223,14 @@ "term_vector": "with_positions_offsets", "norms": false }, + "text_filter": { + "type": "alias", + "path": "paragraph.text" + }, + "text_trigger": { + "type": "alias", + "path": "paragraph.text" + }, "id": { "type": "keyword", "store": true diff --git a/gepi/gepi-indexing/gepi-indexing-pubmed/descAll/JCoRe XMI Database Multiplier Reader.xml b/gepi/gepi-indexing/gepi-indexing-pubmed/descAll/JCoRe XMI Database Multiplier Reader.xml index 1753e18b..4f873741 100644 --- a/gepi/gepi-indexing/gepi-indexing-pubmed/descAll/JCoRe XMI Database Multiplier Reader.xml +++ b/gepi/gepi-indexing/gepi-indexing-pubmed/descAll/JCoRe XMI Database Multiplier Reader.xml @@ -363,7 +363,7 @@ gnormplus:de.julielab.jcore.types.Gene - de.julielab.jcore.types.PennBioIEPosTag + de.julielab.jcore.types.PennBioIEPOSTag diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java index 8c836c64..b2263d8c 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java @@ -125,8 +125,8 @@ public class GepiInput { @Parameter private String paragraphFilterString; - @Persist @Property + @Persist(TabPersistentField.TAB) private String filterFieldsConnectionOperator; @Property @@ -189,7 +189,8 @@ public SelectModel getEventTypeModel() { void setupRender() { log.warn("{}", inputMode); - filterFieldsConnectionOperator = "AND"; + if (filterFieldsConnectionOperator == null) + filterFieldsConnectionOperator = "AND"; } void onValidateFromInputForm() { diff --git a/gepi/gepi-webapp/src/main/resources/logback.xml b/gepi/gepi-webapp/src/main/resources/logback.xml index 19818892..2c426e5d 100644 --- a/gepi/gepi-webapp/src/main/resources/logback.xml +++ b/gepi/gepi-webapp/src/main/resources/logback.xml @@ -20,20 +20,19 @@ - + + - - - + + + + -->