Skip to content

Commit

Permalink
Fix #163.
Browse files Browse the repository at this point in the history
  • Loading branch information
khituras committed Oct 21, 2022
1 parent 4fd311a commit 38c4a1c
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ public static BoolQuery getClosedQuery(GepiRequestData requestData, Set<String>
BoolQuery filterQuery = new BoolQuery();
BoolClause.Occur sentenceParagraphOccur = requestData.getFilterFieldsConnectionOperator().equalsIgnoreCase("and") ? MUST : SHOULD;
if (!StringUtils.isBlank(requestData.getSentenceFilterString())) {
addFulltextSearchQuery(requestData.getSentenceFilterString(), FIELD_EVENT_SENTENCE, sentenceParagraphOccur, filterQuery);
addFulltextSearchQuery(requestData.getSentenceFilterString(), FIELD_EVENT_SENTENCE_TEXT, sentenceParagraphOccur, filterQuery);
}
if (!StringUtils.isBlank(requestData.getParagraphFilterString())) {
addFulltextSearchQuery(requestData.getParagraphFilterString(), FIELD_EVENT_PARAGRAPH, sentenceParagraphOccur, filterQuery);
addFulltextSearchQuery(requestData.getParagraphFilterString(), FIELD_EVENT_PARAGRAPH_TEXT, sentenceParagraphOccur, filterQuery);
}
if (!StringUtils.isBlank(requestData.getSectionNameFilterString())) {
addFulltextSearchQuery(requestData.getSectionNameFilterString(), FIELD_PARAGRAPH_HEADINGS, FILTER, eventQuery);
Expand Down Expand Up @@ -113,10 +113,10 @@ public static BoolQuery getOpenQuery(GepiRequestData requestData) throws Interru
BoolQuery filterQuery = new BoolQuery();
BoolClause.Occur sentenceParagraphOccur = requestData.getFilterFieldsConnectionOperator().equalsIgnoreCase("and") ? MUST : SHOULD;
if (!StringUtils.isBlank(sentenceFilter)) {
addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE, sentenceParagraphOccur, filterQuery);
addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE_TEXT, sentenceParagraphOccur, filterQuery);
}
if (!StringUtils.isBlank(paragraphFilter)) {
addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH, sentenceParagraphOccur, filterQuery);
addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH_TEXT, sentenceParagraphOccur, filterQuery);
}
if (!StringUtils.isBlank(sectionNameFilter)) {
addFulltextSearchQuery(sectionNameFilter, FIELD_PARAGRAPH_HEADINGS, FILTER, eventQuery);
Expand Down Expand Up @@ -147,10 +147,10 @@ public static BoolQuery getFulltextQuery(List<String> eventTypes, String sentenc
BoolQuery fulltextQuery = new BoolQuery();
BoolClause.Occur filterFieldsOccur = filterFieldsConnectionOperator.equalsIgnoreCase("and") ? MUST : BoolClause.Occur.SHOULD;
if (!StringUtils.isBlank(sentenceFilter)) {
addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE, filterFieldsOccur, fulltextQuery);
addFulltextSearchQuery(sentenceFilter, FIELD_EVENT_SENTENCE_TEXT, filterFieldsOccur, fulltextQuery);
}
if (!StringUtils.isBlank(paragraphFilter)) {
addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH, filterFieldsOccur, fulltextQuery);
addFulltextSearchQuery(paragraphFilter, FIELD_EVENT_PARAGRAPH_TEXT, filterFieldsOccur, fulltextQuery);
}
if (!StringUtils.isBlank(sectionNameFilter)) {
addFulltextSearchQuery(sectionNameFilter, FIELD_PARAGRAPH_HEADINGS, filterFieldsOccur, eventQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import org.slf4j.Logger;

import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.*;
Expand Down Expand Up @@ -73,10 +75,14 @@ private Stream<Event> resultDocuments2Events(Stream<ISearchServerDocument> docum
List<Object> matchTypes = eventDocument.getFieldValues(FIELD_EVENT_ARG_MATCH_TYPES).orElse(Collections.emptyList());
Optional<String> mainEventType = eventDocument.get(FIELD_EVENT_MAINEVENTTYPE);
Optional<Integer> likelihood = eventDocument.get(FIELD_EVENT_LIKELIHOOD);
Optional<String> sentence = eventDocument.get(FIELD_EVENT_SENTENCE);
Optional<String> paragraph = eventDocument.get(FIELD_EVENT_PARAGRAPH);
List<String> sentenceHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE);
List<String> paragraphHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH);
Optional<String> sentence = eventDocument.get(FIELD_EVENT_SENTENCE_TEXT);
Optional<String> paragraph = eventDocument.get(FIELD_EVENT_PARAGRAPH_TEXT);
List<String> sentenceArgumentHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE_TEXT);
List<String> sentenceTriggerHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE_TEXT_TRIGGER);
List<String> sentenceFilterHl = eventDocument.getHighlights().get(FIELD_EVENT_SENTENCE_TEXT_FILTER);
List<String> paragraphArgumentHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH_TEXT);
List<String> paragraphTriggerHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH_TEXT_TRIGGER);
List<String> paragraphFilterHl = eventDocument.getHighlights().get(FIELD_EVENT_PARAGRAPH_TEXT_FILTER);
List<String> geneMappingSources = eventDocument.getFieldValues(FIELD_GENE_MAPPING_SOURCE).orElse(Collections.emptyList()).stream().map(Object::toString).collect(Collectors.toList());
String eventId = eventDocument.getId();

Expand Down Expand Up @@ -115,12 +121,12 @@ private Stream<Event> resultDocuments2Events(Stream<ISearchServerDocument> docum
if (mainEventType.isPresent())
event.setMainEventType(mainEventType.get());
event.setAllEventTypes(allEventTypes.stream().map(String.class::cast).collect(Collectors.toList()));
if (sentenceHl != null && !sentenceHl.isEmpty())
event.setHlSentence(StringUtils.normalizeSpace(sentenceHl.get(0)));
if (sentenceArgumentHl != null && !sentenceArgumentHl.isEmpty())
event.setHlSentence(StringUtils.normalizeSpace(sentenceArgumentHl.get(0)));
if (sentence.isPresent())
event.setSentence(StringUtils.normalizeSpace(sentence.get()));
if (paragraphHl != null && !paragraphHl.isEmpty())
event.setHlParagraph(StringUtils.normalizeSpace(paragraphHl.get(0)));
if (paragraphFilterHl != null && !paragraphFilterHl.isEmpty())
event.setHlParagraph(StringUtils.normalizeSpace(paragraphFilterHl.get(0)));
if (paragraph.isPresent())
event.setParagraph(StringUtils.normalizeSpace(paragraph.get()));
for (int i = 0; i < event.getNumArguments(); i++) {
Expand All @@ -134,18 +140,74 @@ private Stream<Event> resultDocuments2Events(Stream<ISearchServerDocument> docum
}
if (event.getHlSentence() != null) {
Matcher fulltextQueryHighlightedMatcher = FULLTEXT_QUERY_HIGHLIGHT_PATTERN.matcher(event.getHlSentence());
if (fulltextQueryHighlightedMatcher.find())
event.setSentenceMatchingFulltextQuery(true);
// if (fulltextQueryHighlightedMatcher.find())
// event.setSentenceMatchingFulltextQuery(true);
}
if (event.getHlParagraph() != null) {
Matcher fulltextQueryHighlightedMatcher = FULLTEXT_QUERY_HIGHLIGHT_PATTERN.matcher(event.getHlParagraph());
if (fulltextQueryHighlightedMatcher.find())
event.setParagraphMatchingFulltextQuery(true);
// if (fulltextQueryHighlightedMatcher.find())
// event.setParagraphMatchingFulltextQuery(true);
}
event.setSentenceMatchingFulltextQuery(sentenceFilterHl != null && !sentenceFilterHl.isEmpty());
event.setParagraphMatchingFulltextQuery(paragraphFilterHl != null && !paragraphFilterHl.isEmpty());
event.setGeneMappingSources(geneMappingSources);
return event;
}).filter(Objects::nonNull);
}

/**
* <p>Merges different highlighting of the same text string via HTML tags into a single text string with all the highlight tags.</p>
*
* @param highlights The different highlightings of the same text.
* @return The combined highlighted string or <code>null</code> if all input highlights were <code>null</code>.
*/
private String mergeHighlighting(String... highlights) {
Pattern tagPattern = Pattern.compile("<[^>]+>");
// Build position-tag maps. This list will contain one position-tag map for each highlighted string
List<SortedMap<Integer, String>> tagMaps = new ArrayList<>();
for (String hl : highlights) {
if (hl == null)
continue;
final Matcher tagMatcher = tagPattern.matcher(hl);
// Sums up the encountered tag lengths. Thus, the start of a tag in the highlighted string minus the offset
// is the start of the tag without counting previous tags, hence, in the original string.
int offset = 0;
// This map stores the position of each tag in the original, non-highlighted string.
SortedMap<Integer, String> pos2tag = new TreeMap<>();
while (tagMatcher.find()) {
final int tagPos = tagMatcher.start();
final String tag = tagMatcher.group();
pos2tag.put(tagPos - offset, tag);
offset += tag.length();
}
}
if (tagMaps.isEmpty())
return null;

// we will add the elements of the merged highlight string from end to start
List<String> reversedMergedHighlight = new ArrayList<>();
String nonHighlightedString = tagPattern.matcher(highlights[0]).replaceAll("");
int lastPos = nonHighlightedString.length();
int maxPosIndex = Integer.MIN_VALUE;
// Assemble the merged highlight string. In each iteration we determine the remaining tag with the largest
// offset and add the text between it and the previous tag and itself to the merged string.
while (tagMaps.stream().anyMatch(Predicate.not(Map::isEmpty))) {
for (var tagMap : tagMaps)
maxPosIndex = Math.max(maxPosIndex, tagMap.lastKey());
// the last tag as in the highest offset position of all tags of all highlights
final SortedMap<Integer, String> lastTagMap = tagMaps.get(maxPosIndex);
int pos = lastTagMap.lastKey();
String tag = lastTagMap.get(pos);
reversedMergedHighlight.add(nonHighlightedString.substring(pos + tag.length(), lastPos));
reversedMergedHighlight.add(tag);
// Save the start position of this tag. For the next tag we will need it as the end point of the substring
// on the nonHighlightedString.
lastPos = pos;
// Remove this tag so that in the next iteration we get the preceeding one.
lastTagMap.remove(pos);
}
return IntStream.range(0, reversedMergedHighlight.size()).mapToObj(i -> reversedMergedHighlight.get(reversedMergedHighlight.size() - i)).collect(Collectors.joining());
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,17 @@ public class EventRetrievalService implements IEventRetrievalService {

public static final String FIELD_EVENT_ARG2_HOMOLOGY_PREFERRED_NAME = "argument2homoprefname";

public static final String FIELD_EVENT_SENTENCE = "sentence.text";
public static final String FIELD_EVENT_SENTENCE_TEXT = "sentence.text";

public static final String FIELD_EVENT_PARAGRAPH = "paragraph.text";
public static final String FIELD_EVENT_PARAGRAPH_TEXT = "paragraph.text";

public static final String FIELD_EVENT_SENTENCE_TEXT_FILTER = "sentence.text_filter";

public static final String FIELD_EVENT_PARAGRAPH_TEXT_FILTER = "paragraph.text_filter";

public static final String FIELD_EVENT_SENTENCE_TEXT_TRIGGER = "sentence.text_trigger";

public static final String FIELD_EVENT_PARAGRAPH_TEXT_TRIGGER = "paragraph.text_trigger";

public static final String FIELD_PARAGRAPH_HEADINGS = "paragraph.headings";

Expand All @@ -104,7 +112,7 @@ public class EventRetrievalService implements IEventRetrievalService {
FIELD_PMID,
FIELD_PMCID,
FIELD_EVENT_LIKELIHOOD,
FIELD_EVENT_SENTENCE,
FIELD_EVENT_SENTENCE_TEXT,
FIELD_EVENT_MAINEVENTTYPE,
FIELD_EVENT_ALL_EVENTTYPES,
FIELD_EVENT_ARG_GENE_IDS,
Expand Down Expand Up @@ -192,18 +200,7 @@ public CompletableFuture<EventRetrievalResult> closedSearch(GepiRequestData requ
if (!forCharts)
serverRqst.addSortCommand("_doc", SortOrder.ASCENDING);
if (!forCharts) {
HighlightCommand hlc = new HighlightCommand();
hlc.addField(FIELD_EVENT_SENTENCE, 10, 0);
hlc.addField(FIELD_EVENT_PARAGRAPH, 10, 0);
hlc.fields.forEach(f -> {
f.pre = "<b>";
f.post = "</b>";
TermQuery tq = new TermQuery();
tq.field = f.field;
tq.term = "xargumentx";
f.highlightQuery = tq;
});
serverRqst.addHighlightCmd(hlc);
addHighlighting(serverRqst);
}

ElasticSearchCarrier<ElasticServerResponse> carrier = new ElasticSearchCarrier<>("BipartiteEvents");
Expand Down Expand Up @@ -324,20 +321,40 @@ public SearchServerRequest getOpenSearchRequest(GepiRequestData requestData, int
if (!forCharts)
serverRqst.addSortCommand("_doc", SortOrder.ASCENDING);
if (!forCharts) {
HighlightCommand hlc = new HighlightCommand();
hlc.addField(FIELD_EVENT_SENTENCE, 10, 0);
hlc.addField(FIELD_EVENT_PARAGRAPH, 10, 0);
hlc.fields.forEach(f -> {
f.pre = "<b>";
f.post = "</b>";
addHighlighting(serverRqst);
}
return serverRqst;
}

private void addHighlighting(SearchServerRequest serverRqst) {
serverRqst.addHighlightCmd(getHighlightCommand("xargumentx", "hl-argument", FIELD_EVENT_SENTENCE_TEXT, FIELD_EVENT_PARAGRAPH_TEXT));
serverRqst.addHighlightCmd(getHighlightCommand("xtriggerx", "hl-trigger", FIELD_EVENT_SENTENCE_TEXT_TRIGGER, FIELD_EVENT_PARAGRAPH_TEXT_TRIGGER));
serverRqst.addHighlightCmd(getHighlightCommand(null, "hl-filter", FIELD_EVENT_SENTENCE_TEXT_FILTER, FIELD_EVENT_PARAGRAPH_TEXT_FILTER));
}

/**
* <p>Created highlight commands required for GePI searches.</p>
* <p>When <code>hlTerm</code> is not null, it used in a <code>TermQuery</code> that is specified as a highlight query. This is used to highlight only special terms like event argument and event trigger words. The respective placeholder terms - <code>xargumentx</code> and <code>xtriggerx</code> - have been added in the <code>RelationDocumentGenerator</code> in the indexing code. If <code>hlTerm</code> is null, the actual query terms are highlighted.</p>
*
* @param hlTerm
* @param hlClass
* @return
*/
private HighlightCommand getHighlightCommand(String hlTerm, String hlClass, String... hlFields) {
HighlightCommand hlc = new HighlightCommand();
for (String hlField : hlFields)
hlc.addField(hlField, 1, 0);
hlc.fields.forEach(f -> {
f.pre = "<em class=\"" + hlClass + "\">";
f.post = "</em>";
if (hlTerm != null) {
TermQuery tq = new TermQuery();
tq.field = f.field;
tq.term = "xargumentx";
tq.term = hlTerm;
f.highlightQuery = tq;
});
serverRqst.addHighlightCmd(hlc);
}
return serverRqst;
}
});
return hlc;
}


Expand All @@ -360,18 +377,7 @@ public CompletableFuture<EventRetrievalResult> getFulltextFilteredEvents(GepiReq
if (!forCharts)
serverRqst.addSortCommand("_doc", SortOrder.ASCENDING);
if (!forCharts) {
HighlightCommand hlc = new HighlightCommand();
hlc.addField(FIELD_EVENT_SENTENCE, 10, 0);
hlc.addField(FIELD_EVENT_PARAGRAPH, 10, 0);
hlc.fields.forEach(f -> {
f.pre = "<b>";
f.post = "</b>";
TermQuery tq = new TermQuery();
tq.field = f.field;
tq.term = "xargumentx";
f.highlightQuery = tq;
});
serverRqst.addHighlightCmd(hlc);
addHighlighting(serverRqst);
}

ElasticSearchCarrier<ElasticServerResponse> carrier = new ElasticSearchCarrier("FulltextFilteredEvents");
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#The port number is automatically set in de.julielab.gepi.core.retrieval.services.EventRetrievalServiceIntegrationTest
#Thu Oct 20 08:26:15 CEST 2022
#Thu Oct 20 08:37:47 CEST 2022
elasticquery.url=localhost
elasticquery.clustername=gepi_testcluster
elasticquery.port=61639
elasticquery.port=63000
gepi.documents.index.name=gepi_testindex
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@
"term_vector": "with_positions_offsets",
"norms": false
},
"text_filter": {
"type": "alias",
"path": "sentence.text"
},
"text_trigger": {
"type": "alias",
"path": "sentence.text"
},
"id": {
"type": "keyword",
"store": true
Expand All @@ -215,6 +223,14 @@
"term_vector": "with_positions_offsets",
"norms": false
},
"text_filter": {
"type": "alias",
"path": "paragraph.text"
},
"text_trigger": {
"type": "alias",
"path": "paragraph.text"
},
"id": {
"type": "keyword",
"store": true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@

<string>gnormplus:de.julielab.jcore.types.Gene</string>

<string>de.julielab.jcore.types.PennBioIEPosTag</string>
<string>de.julielab.jcore.types.PennBioIEPOSTag</string>

</array>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ public class GepiInput {
@Parameter
private String paragraphFilterString;

@Persist
@Property
@Persist(TabPersistentField.TAB)
private String filterFieldsConnectionOperator;

@Property
Expand Down Expand Up @@ -189,7 +189,8 @@ public SelectModel getEventTypeModel() {

void setupRender() {
log.warn("{}", inputMode);
filterFieldsConnectionOperator = "AND";
if (filterFieldsConnectionOperator == null)
filterFieldsConnectionOperator = "AND";
}

void onValidateFromInputForm() {
Expand Down
Loading

0 comments on commit 38c4a1c

Please sign in to comment.