diff --git a/pom.xml b/pom.xml
index f4dd850..33d2950 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
org.icatproject
icat.lucene
- 2.0.3-SNAPSHOT
+ 3.0.0-SNAPSHOT
war
ICAT Lucene
@@ -14,7 +14,7 @@
https://repo.icatproject.org/repo
github
https://github.com/icatproject/icat.lucene
- 5.5.5
+ 8.11.2
@@ -86,6 +86,12 @@
${luceneVersion}
+
+ org.apache.lucene
+ lucene-facet
+ ${luceneVersion}
+
+
org.apache.lucene
lucene-backward-codecs
@@ -102,7 +108,7 @@
org.icatproject
icat.utils
- 4.16.1
+ 4.17.0-SNAPSHOT
@@ -330,6 +336,3 @@
Exposes lucene calls to an icat server
-
-
-
diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example
index b010790..7702881 100644
--- a/src/main/config/run.properties.example
+++ b/src/main/config/run.properties.example
@@ -1,6 +1,16 @@
# Real comments in this file are marked with '#' whereas commented out lines
# are marked with '!'
-directory = ${HOME}/data/lucene
-commitSeconds = 5
-ip = 127.0.0.1/32
+directory = ${HOME}/data/search
+commitSeconds = 5
+maxShardSize = 2147483648
+ip = 127.0.0.1/32
+# A search taking longer than this will be cancelled to avoid blocking other users' searches
+maxSearchTimeSeconds = 5
+# List of units to enable conversion to SI units when querying on numerical parameters
+!units = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
+# List of fields that should be stored for facet filtering when searching
+# In order to be available, these fields must be set when indexing the data
+facetFields = datafileFormat.name instrument.name sample.type.name stringValue technique.name type.name
+# Aggregate file sizes and counts in real time (this will have a performance impact on write operations)
+aggregateFiles = false
diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
new file mode 100644
index 0000000..42f0e87
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -0,0 +1,127 @@
+package org.icatproject.lucene;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
+import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
+import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
+
+public class DocumentMapping {
+
+ /**
+ * Represents the parent child relationship between two ICAT entities.
+ */
+ public static class ParentRelationship {
+ public String parentName;
+ public String joiningField;
+ public Set fields;
+
+ /**
+ * @param parentName Name of the parent entity.
+ * @param joiningField Field that joins the child to its parent.
+ * @param fields Fields that should be updated by this relationship.
+ */
+ public ParentRelationship(String parentName, String joiningField, String... fields) {
+ this.parentName = parentName;
+ this.joiningField = joiningField;
+ this.fields = new HashSet<>(Arrays.asList(fields));
+ }
+ }
+
+ private static Analyzer analyzer = new IcatSynonymAnalyzer();;
+
+ public static final Set doubleFields = new HashSet<>();
+ public static final Set longFields = new HashSet<>();
+ public static final Set sortFields = new HashSet<>();
+ public static final Set textFields = new HashSet<>();
+ public static final Set indexedEntities = new HashSet<>();
+ public static final Map relationships = new HashMap<>();
+
+ public static final StandardQueryParser genericParser = buildParser();
+ public static final StandardQueryParser datafileParser = buildParser("name", "description", "location",
+ "datafileFormat.name", "visitId", "sample.name", "sample.type.name", "doi");
+ public static final StandardQueryParser datasetParser = buildParser("name", "description", "sample.name",
+ "sample.type.name", "type.name", "visitId", "doi");
+ public static final StandardQueryParser investigationParser = buildParser("name", "visitId", "title", "summary",
+ "facility.name", "type.name", "doi");
+ public static final StandardQueryParser sampleParser = buildParser("sample.name", "sample.type.name");
+
+ static {
+ doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI", "rangeTop", "rangeTopSI", "rangeBottom",
+ "rangeBottomSI"));
+ longFields.addAll(
+ Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize",
+ "fileCount", "datafile.id", "datafileFormat.id", "dataset.id", "facility.id",
+ "facilityCycle.id", "investigation.id", "instrument.id", "id", "sample.id",
+ "sample.investigation.id", "sample.type.id", "technique.id", "type.id", "user.id"));
+ sortFields.addAll(
+ Arrays.asList("datafile.id", "datafileFormat.id", "dataset.id", "facility.id", "facilityCycle.id",
+ "investigation.id", "instrument.id", "id", "sample.id", "sample.investigation.id",
+ "technique.id", "type.id", "user.id", "date", "name", "stringValue", "dateTimeValue",
+ "numericValue", "numericValueSI", "fileSize", "fileCount"));
+ textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
+ "investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
+ "sample.type.name", "technique.name", "technique.description", "technique.pid", "title", "summary",
+ "facility.name", "user.fullName", "type.name", "doi"));
+
+ indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
+ "DatasetParameter", "DatasetTechnique", "InstrumentScientist", "InvestigationFacilityCycle",
+ "InvestigationInstrument", "InvestigationParameter", "InvestigationUser", "Sample", "SampleParameter"));
+
+ relationships.put("Instrument",
+ new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument.id",
+ "instrument.name", "instrument.fullName") });
+ relationships.put("User",
+ new ParentRelationship[] {
+ new ParentRelationship("InvestigationUser", "user.id", "user.name", "user.fullName"),
+ new ParentRelationship("InstrumentScientist", "user.id", "user.name", "user.fullName") });
+ relationships.put("Sample", new ParentRelationship[] {
+ new ParentRelationship("Dataset", "sample.id", "sample.name", "sample.investigation.id"),
+ new ParentRelationship("Datafile", "sample.id", "sample.name", "sample.investigation.id") });
+ relationships.put("SampleType",
+ new ParentRelationship[] { new ParentRelationship("Sample", "type.id", "type.name"),
+ new ParentRelationship("Dataset", "sample.type.id", "sample.type.name"),
+ new ParentRelationship("Datafile", "sample.type.id", "sample.type.name") });
+ relationships.put("InvestigationType",
+ new ParentRelationship[] { new ParentRelationship("Investigation", "type.id", "type.name") });
+ relationships.put("DatasetType",
+ new ParentRelationship[] { new ParentRelationship("Dataset", "type.id", "type.name") });
+ relationships.put("DatafileFormat",
+ new ParentRelationship[] {
+ new ParentRelationship("Datafile", "datafileFormat.id", "datafileFormat.name") });
+ relationships.put("Facility",
+ new ParentRelationship[] { new ParentRelationship("Investigation", "facility.id", "facility.name") });
+ relationships.put("ParameterType",
+ new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type.id", "type.name"),
+ new ParentRelationship("DatasetParameter", "type.id", "type.name"),
+ new ParentRelationship("InvestigationParameter", "type.id", "type.name"),
+ new ParentRelationship("SampleParameter", "type.id", "type.name") });
+ relationships.put("Technique",
+ new ParentRelationship[] { new ParentRelationship("DatasetTechnique", "technique.id", "technique.name",
+ "technique.description", "technique.pid") });
+ relationships.put("Investigation",
+ new ParentRelationship[] {
+ new ParentRelationship("Dataset", "investigation.id", "investigation.name",
+ "investigation.title", "investigation.startDate", "visitId"),
+ new ParentRelationship("datafile", "investigation.id", "investigation.name", "visitId") });
+ relationships.put("Dataset",
+ new ParentRelationship[] { new ParentRelationship("Datafile", "dataset.id", "dataset.name") });
+ }
+
+ private static StandardQueryParser buildParser(String... defaultFields) {
+ StandardQueryParser parser = new StandardQueryParser();
+ StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler) parser.getQueryConfigHandler();
+ qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
+ qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
+ if (defaultFields.length > 0) {
+ qpConf.set(ConfigurationKeys.MULTI_FIELDS, defaultFields);
+ }
+
+ return parser;
+ }
+}
diff --git a/src/main/java/org/icatproject/lucene/FacetedDimension.java b/src/main/java/org/icatproject/lucene/FacetedDimension.java
new file mode 100644
index 0000000..bfd1e7f
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/FacetedDimension.java
@@ -0,0 +1,109 @@
+package org.icatproject.lucene;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import jakarta.json.Json;
+import jakarta.json.JsonObjectBuilder;
+
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.LabelAndValue;
+import org.apache.lucene.facet.range.DoubleRange;
+import org.apache.lucene.facet.range.LongRange;
+import org.apache.lucene.facet.range.Range;
+
+/**
+ * For a single dimension (field), stores labels (the unique values or ranges of
+ * values for that field in the index) and their respective counts (the number
+ * of times that label appears in different documents).
+ *
+ * For example, a dimension might be "colour", the label "red", and the count 5.
+ */
+public class FacetedDimension {
+
+ private String dimension;
+ private List ranges;
+ private List labels;
+ private List counts;
+
+ /**
+ * Creates an "empty" FacetedDimension. The dimension (field) is set but ranges,
+ * labels and counts are not.
+ *
+ * @param dimension The dimension, or field, to be faceted
+ */
+ public FacetedDimension(String dimension) {
+ this.dimension = dimension;
+ this.ranges = new ArrayList<>();
+ this.labels = new ArrayList<>();
+ this.counts = new ArrayList<>();
+ }
+
+ /**
+ * Extracts the count for each label in the FacetResult. If the label has
+ * already been encountered, the count is incremented rather than being
+ * overridden. Essentially, this allows faceting to be performed across multiple
+ * shards.
+ *
+ * @param facetResult A Lucene FacetResult object corresponding the relevant
+ * dimension
+ */
+ public void addResult(FacetResult facetResult) {
+ for (LabelAndValue labelAndValue : facetResult.labelValues) {
+ String label = labelAndValue.label;
+ int labelIndex = labels.indexOf(label);
+ if (labelIndex == -1) {
+ labels.add(label);
+ counts.add(labelAndValue.value.longValue());
+ } else {
+ counts.set(labelIndex, counts.get(labelIndex) + labelAndValue.value.longValue());
+ }
+ }
+ }
+
+ /**
+ * Formats the labels and counts into Json.
+ *
+ * @param aggregationsBuilder The JsonObjectBuilder to add the facets for this
+ * dimension to.
+ */
+ public void buildResponse(JsonObjectBuilder aggregationsBuilder) {
+ JsonObjectBuilder bucketsBuilder = Json.createObjectBuilder();
+ for (int i = 0; i < labels.size(); i++) {
+ JsonObjectBuilder bucketBuilder = Json.createObjectBuilder();
+ bucketBuilder.add("doc_count", counts.get(i));
+ if (ranges.size() > i) {
+ Range range = ranges.get(i);
+ if (range.getClass().getSimpleName().equals("LongRange")) {
+ bucketBuilder.add("from", ((LongRange) range).min);
+ bucketBuilder.add("to", ((LongRange) range).max);
+ } else if (range.getClass().getSimpleName().equals("DoubleRange")) {
+ bucketBuilder.add("from", ((DoubleRange) range).min);
+ bucketBuilder.add("to", ((DoubleRange) range).max);
+ }
+ }
+ bucketsBuilder.add(labels.get(i), bucketBuilder);
+ }
+ aggregationsBuilder.add(dimension, Json.createObjectBuilder().add("buckets", bucketsBuilder));
+ }
+
+ /**
+ * @return The list of Lucene Range Objects for use with numerical facets.
+ * For String faceting, this will be empty.
+ */
+ public List getRanges() {
+ return ranges;
+ }
+
+ /**
+ * @return The dimension that these labels and counts correspond to.
+ */
+ public String getDimension() {
+ return dimension;
+ }
+
+ public String toString() {
+ return dimension + ": " + labels + ", " + counts;
+ }
+
+}
diff --git a/src/main/java/org/icatproject/lucene/Field.java b/src/main/java/org/icatproject/lucene/Field.java
new file mode 100644
index 0000000..ad24647
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/Field.java
@@ -0,0 +1,184 @@
+package org.icatproject.lucene;
+
+import jakarta.json.JsonObject;
+
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
+
+/**
+ * Wrapper for the name, value and type (String/Text, long, double) of a field
+ * to be added to a Lucene Document.
+ */
+class Field {
+
+ private abstract class InnerField {
+
+ public abstract void addSortable(Document document) throws NumberFormatException;
+
+ public abstract void addToDocument(Document document) throws NumberFormatException;
+
+ }
+
+ private class InnerStringField extends InnerField {
+
+ private String value;
+
+ public InnerStringField(String value) {
+ this.value = value;
+ }
+
+ @Override
+ public void addSortable(Document document) throws NumberFormatException {
+ if (DocumentMapping.sortFields.contains(name)) {
+ document.add(new SortedDocValuesField(name, new BytesRef(value)));
+ }
+ }
+
+ @Override
+ public void addToDocument(Document document) throws NumberFormatException {
+ addSortable(document);
+
+ if (facetable) {
+ document.add(new SortedSetDocValuesFacetField(name + ".keyword", value));
+ document.add(new StringField(name + ".keyword", value, Store.NO));
+ }
+
+ if (DocumentMapping.textFields.contains(name)) {
+ document.add(new TextField(name, value, Store.YES));
+ } else {
+ document.add(new StringField(name, value, Store.YES));
+ }
+
+ }
+
+ }
+
+ private class InnerLongField extends InnerField {
+
+ private long value;
+
+ public InnerLongField(long value) {
+ this.value = value;
+ }
+
+ @Override
+ public void addSortable(Document document) throws NumberFormatException {
+ if (DocumentMapping.sortFields.contains(name)) {
+ document.add(new NumericDocValuesField(name, value));
+ }
+ }
+
+ @Override
+ public void addToDocument(Document document) throws NumberFormatException {
+ addSortable(document);
+ document.add(new LongPoint(name, value));
+ document.add(new StoredField(name, value));
+ }
+
+ }
+
+ private class InnerDoubleField extends InnerField {
+
+ private double value;
+
+ public InnerDoubleField(double value) {
+ this.value = value;
+ }
+
+ @Override
+ public void addSortable(Document document) throws NumberFormatException {
+ if (DocumentMapping.sortFields.contains(name)) {
+ long sortableLong = NumericUtils.doubleToSortableLong(value);
+ document.add(new NumericDocValuesField(name, sortableLong));
+ }
+ }
+
+ @Override
+ public void addToDocument(Document document) throws NumberFormatException {
+ addSortable(document);
+ document.add(new DoublePoint(name, value));
+ document.add(new StoredField(name, value));
+ }
+
+ }
+
+ private String name;
+ private InnerField innerField;
+ private boolean facetable;
+
+ /**
+ * Creates a wrapper for a Field.
+ *
+ * @param object JsonObject containing representations of multiple fields
+ * @param key Key of a specific field in object
+ * @param facetFields List of String field names which should be stored as a facetable keyword
+ */
+ public Field(JsonObject object, String key, List facetFields) {
+ name = key;
+ facetable = facetFields.contains(name);
+ if (DocumentMapping.doubleFields.contains(name)) {
+ innerField = new InnerDoubleField(object.getJsonNumber(name).doubleValue());
+ } else if (DocumentMapping.longFields.contains(name)) {
+ innerField = new InnerLongField(object.getJsonNumber(name).longValueExact());
+ } else {
+ innerField = new InnerStringField(object.getString(name));
+ }
+ }
+
+ /**
+ * Creates a wrapper for a Field.
+ *
+ * @param indexableField A Lucene IndexableField
+ * @param facetFields List of String fields which should be stored as a facetable keyword
+ */
+ public Field(IndexableField indexableField, List facetFields) {
+ name = indexableField.name();
+ facetable = facetFields.contains(name);
+ if (DocumentMapping.doubleFields.contains(name)) {
+ innerField = new InnerDoubleField(indexableField.numericValue().doubleValue());
+ } else if (DocumentMapping.longFields.contains(name)) {
+ innerField = new InnerLongField(indexableField.numericValue().longValue());
+ } else {
+ innerField = new InnerStringField(indexableField.stringValue());
+ }
+ }
+
+ /**
+ * Adds a sortable field to the passed document. This only accounts for sorting,
+ * if storage and searchability are also needed, see {@link #addToDocument}. The
+ * exact implementation depends on whether this is a String, long or double
+ * field.
+ *
+ * @param document The document to add to
+ * @throws NumberFormatException
+ */
+ public void addSortable(Document document) throws NumberFormatException {
+ innerField.addSortable(document);
+ }
+
+ /**
+ * Adds this field to the passed document. This accounts for sortable and
+ * facetable fields. The exact implementation depends on whether this is a
+ * String, long or double field.
+ *
+ * @param document The document to add to
+ * @throws NumberFormatException
+ */
+ public void addToDocument(Document document) throws NumberFormatException {
+ innerField.addToDocument(document);
+ }
+
+}
diff --git a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
old mode 100644
new mode 100755
index cb6767e..7494b84
--- a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
@@ -1,25 +1,41 @@
package org.icatproject.lucene;
+import java.util.Arrays;
+import java.util.List;
+
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
public class IcatAnalyzer extends Analyzer {
+ public static final CharArraySet SCIENTIFIC_STOP_WORDS_SET;
+
+ /**
+ * Do not include (As At Be In No) in the stop words as these are chemical
+ * symbols. Otherwise, the set should match Lucene's ENGLISH_STOP_WORDS_SET
+ */
+ static {
+ final List stopWords =
+ Arrays.asList("a", "an", "and", "are", "but", "by", "for", "if", "into", "is",
+ "it", "not", "on", "or", "such", "that", "the", "their", "then",
+ "there", "these", "they", "this", "to", "was", "will", "with");
+ final CharArraySet stopSet = new CharArraySet(stopWords, false);
+ SCIENTIFIC_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
+ }
+
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new StandardTokenizer();
- TokenStream sink = new StandardFilter(source);
- sink = new EnglishPossessiveFilter(sink);
+ TokenStream sink = new EnglishPossessiveFilter(source);
sink = new LowerCaseFilter(sink);
- sink = new StopFilter(sink, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ sink = new StopFilter(sink, SCIENTIFIC_STOP_WORDS_SET);
sink = new PorterStemFilter(sink);
return new TokenStreamComponents(source, sink);
}
diff --git a/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
new file mode 100755
index 0000000..029f8fc
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
@@ -0,0 +1,54 @@
+package org.icatproject.lucene;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.text.ParseException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.synonym.SolrSynonymParser;
+import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+
+public class IcatSynonymAnalyzer extends Analyzer {
+
+ private SynonymMap synonyms;
+
+ public IcatSynonymAnalyzer() {
+ super();
+ // Load synonyms from resource file
+ InputStream in = IcatSynonymAnalyzer.class.getClassLoader().getResourceAsStream("synonym.txt");
+ if (in != null) {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+ SolrSynonymParser parser = new SolrSynonymParser(true, true, new IcatAnalyzer());
+ try {
+ parser.parse(reader);
+ synonyms = parser.build();
+ } catch (IOException | ParseException e) {
+ // If we cannot parse the synonyms, do nothing
+ // To all purposes this will now act as a plain IcatAnalyzer
+ }
+ }
+ }
+
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new StandardTokenizer();
+ TokenStream sink = new EnglishPossessiveFilter(source);
+ sink = new LowerCaseFilter(sink);
+ sink = new StopFilter(sink, IcatAnalyzer.SCIENTIFIC_STOP_WORDS_SET);
+ sink = new PorterStemFilter(sink);
+ if (synonyms != null) {
+ sink = new SynonymGraphFilter(sink, synonyms, false);
+ }
+ return new TokenStreamComponents(source, sink);
+ }
+}
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
old mode 100644
new mode 100755
index fc4dc2b..31efaea
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -6,31 +6,35 @@
import java.net.HttpURLConnection;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
+import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Consumer;
import jakarta.annotation.PostConstruct;
import jakarta.annotation.PreDestroy;
import jakarta.ejb.Singleton;
import jakarta.json.Json;
import jakarta.json.JsonArray;
+import jakarta.json.JsonException;
+import jakarta.json.JsonNumber;
import jakarta.json.JsonObject;
+import jakarta.json.JsonObjectBuilder;
import jakarta.json.JsonReader;
-import jakarta.json.JsonString;
-import jakarta.json.JsonValue;
+import jakarta.json.JsonStructure;
import jakarta.json.stream.JsonGenerator;
-import jakarta.json.stream.JsonParser;
-import jakarta.json.stream.JsonParser.Event;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.ws.rs.Consumes;
-import jakarta.ws.rs.DELETE;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.POST;
import jakarta.ws.rs.Path;
@@ -41,36 +45,50 @@
import jakarta.ws.rs.core.MediaType;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.Facets;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.range.DoubleRange;
+import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
+import org.apache.lucene.facet.range.LongRange;
+import org.apache.lucene.facet.range.LongRangeFacetCounts;
+import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
-import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
-import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BooleanQuery.Builder;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortField.Type;
+import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.TimeLimitingCollector;
import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.join.JoinUtil;
-import org.apache.lucene.search.join.ScoreMode;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.NumericUtils;
+import org.icatproject.lucene.SearchBucket.SearchType;
import org.icatproject.lucene.exceptions.LuceneException;
import org.icatproject.utils.CheckedProperties;
+import org.icatproject.utils.IcatUnits;
+import org.icatproject.utils.IcatUnits.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
@@ -80,48 +98,291 @@
@Singleton
public class Lucene {
- enum AttributeName {
- type, name, value, date, store
- }
+ /**
+ * A bucket for accessing the read and write functionality for a single "shard"
+ * Lucene index which can then be grouped to represent a single document type.
+ */
+ private class ShardBucket {
+ private FSDirectory directory;
+ private IndexWriter indexWriter;
+ private SearcherManager searcherManager;
+ private DefaultSortedSetDocValuesReaderState state;
+ private AtomicLong documentCount;
+
+ /**
+ * Creates a bucket for accessing the read and write functionality for a single
+ * "shard" Lucene index which can then be grouped to represent a single document
+ * type.
+ *
+ * @param shardPath Path to the directory used as storage for this shard.
+ * @throws IOException
+ */
+ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
+ directory = FSDirectory.open(shardPath);
+ IndexWriterConfig config = new IndexWriterConfig(analyzer);
+ indexWriter = new IndexWriter(directory, config);
+ String[] files = directory.listAll();
+ if (files.length == 1 && files[0].equals("write.lock")) {
+ logger.debug("Directory only has the write.lock file so store and delete a dummy document");
+ Document doc = new Document();
+ doc.add(new StringField("dummy", "dummy", Store.NO));
+ indexWriter.addDocument(facetsConfig.build(doc));
+ indexWriter.commit();
+ indexWriter.deleteDocuments(new Term("dummy", "dummy"));
+ indexWriter.commit();
+ logger.debug("Now have " + indexWriter.getDocStats().numDocs + " documents indexed");
+ }
+ searcherManager = new SearcherManager(indexWriter, null);
+ IndexSearcher indexSearcher = searcherManager.acquire();
+ int numDocs = indexSearcher.getIndexReader().numDocs();
+ documentCount = new AtomicLong(numDocs);
+ initState(indexSearcher);
+ logger.info("Created ShardBucket for directory {} with {} Documents", directory.getDirectory(), numDocs);
+ }
+
+ /**
+ * Commits all pending cached documents to this shard.
+ *
+ * @return The number of documents committed to this shard.
+ * @throws IOException
+ */
+ public int commit() throws IOException {
+ if (indexWriter.hasUncommittedChanges()) {
+ indexWriter.commit();
+ searcherManager.maybeRefreshBlocking();
+ initState(searcherManager.acquire());
+ }
+ return indexWriter.numRamDocs();
+ }
- enum FieldType {
- TextField, StringField, SortedDocValuesField, DoubleField
+ /**
+ * Creates a new DefaultSortedSetDocValuesReaderState object for this shard.
+ * This can be expensive for indices with a large number of faceted dimensions
+ * and labels, so should only be done when needed.
+ *
+ * @param indexSearcher The underlying reader of this searcher is used to build
+ * the state
+ * @throws IOException
+ */
+ private void initState(IndexSearcher indexSearcher) throws IOException {
+ try {
+ state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
+ } catch (IllegalArgumentException e) {
+ // This can occur if no fields in the index have been faceted, in which case set
+ // state to null to ensure we don't (erroneously) use the old state
+ logger.error(
+ "No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+ state = null;
+ } finally {
+ searcherManager.release(indexSearcher);
+ }
+ }
}
+ /**
+ * A bucket for accessing the high level functionality, such as
+ * searching, for a single document type. Incoming documents will be routed to
+ * one of the individual "shard" indices that are grouped by this Object.
+ */
private class IndexBucket {
- private FSDirectory directory;
- private IndexWriter indexWriter;
- private SearcherManager searcherManager;
+ private String entityName;
+ private List shardList = new ArrayList<>();
private AtomicBoolean locked = new AtomicBoolean();
- }
- public class Search {
- public Map map;
- public Query query;
- public ScoreDoc lastDoc;
- }
+ /**
+ * Creates a bucket for accessing the high level functionality, such as
+ * searching, for a single document type. Incoming documents will be routed to
+ * one of the individual "shard" indices that are grouped by this Object.
+ *
+ * @param entityName The name of the entity that this index contains documents
+ * for.
+ */
+ public IndexBucket(String entityName) {
+ try {
+ logger.trace("Initialising bucket for {}", entityName);
+ this.entityName = entityName.toLowerCase();
+ Long shardIndex = 0L;
+ java.nio.file.Path shardPath = luceneDirectory.resolve(entityName);
+ ShardBucket shardBucket;
+ // Create at least one shard, then keep creating them so long as directories
+ // exist and already contain Documents
+ do {
+ shardBucket = new ShardBucket(shardPath);
+ shardList.add(shardBucket);
+ shardIndex++;
+ shardPath = luceneDirectory.resolve(entityName + "_" + shardIndex);
+ } while (shardBucket.documentCount.get() > 0 && Files.isDirectory(shardPath));
+ logger.debug("Bucket for {} is now ready with {} shards", entityName, shardIndex);
+ } catch (Throwable e) {
+ logger.error("Can't continue " + e.getClass() + " " + e.getMessage());
+ }
+ }
- enum When {
- Now, Sometime
- }
+ /**
+ * Acquires IndexSearchers from the SearcherManagers of the individual shards in
+ * this bucket.
+ *
+ * @return List of IndexSearchers for all shards in this bucket.
+ * @throws IOException
+ */
+ public List acquireSearchers() throws IOException {
+ List subSearchers = new ArrayList<>();
+ for (ShardBucket shardBucket : shardList) {
+ logger.trace("Acquiring searcher for shard");
+ subSearchers.add(shardBucket.searcherManager.acquire());
+ }
+ return subSearchers;
+ }
+
+ /**
+ * Adds a document to the appropriate shard for this index.
+ *
+ * @param document The document to be added.
+ * @throws IOException
+ */
+ public void addDocument(Document document) throws IOException {
+ ShardBucket shardBucket = routeShard();
+ shardBucket.indexWriter.addDocument(document);
+ shardBucket.documentCount.incrementAndGet();
+ }
+
+ /**
+ * Deletes a document from the appropriate shard for this index.
+ *
+ * @param icatId The ICAT id of the document to be deleted.
+ * @throws IOException
+ */
+ public void deleteDocument(long icatId) throws IOException {
+ for (ShardBucket shardBucket : shardList) {
+ shardBucket.indexWriter.deleteDocuments(LongPoint.newExactQuery("id", icatId));
+ }
+ }
- private static final Logger logger = LoggerFactory.getLogger(Lucene.class);
+ /**
+ * Updates the document with the provided ICAT id.
+ *
+ * @param icatId The ICAT id of the document to be updated.
+ * @param document The document that will replace the old document.
+ * @throws IOException
+ */
+ public void updateDocument(long icatId, Document document) throws IOException {
+ deleteDocument(icatId);
+ addDocument(document);
+ }
+
+ /**
+ * Creates a new ShardBucket and stores it in the shardMap.
+ *
+ * @param shardKey The identifier for the new shard to be created. For
+ * simplicity, should an int starting at 0 and incrementing by 1
+ * for each new shard.
+ * @return A new ShardBucket with the provided shardKey.
+ * @throws IOException
+ */
+ public ShardBucket buildShardBucket(int shardKey) throws IOException {
+ ShardBucket shardBucket = new ShardBucket(luceneDirectory.resolve(entityName + "_" + shardKey));
+ shardList.add(shardBucket);
+ return shardBucket;
+ }
+
+ /**
+ * Commits Documents for writing on all "shard" indices for this bucket.
+ *
+ * @param command The high level command which called this function. Only
+ * used for debug logging.
+ * @param entityName The name of the entities being committed. Only used for
+ * debug logging.
+ * @throws IOException
+ */
+ public void commit(String command, String entityName) throws IOException {
+ for (ShardBucket shardBucket : shardList) {
+ int cached = shardBucket.commit();
+ if (cached != 0) {
+ int numDocs = shardBucket.indexWriter.getDocStats().numDocs;
+ String directoryName = shardBucket.directory.getDirectory().toString();
+ logger.debug("{} has committed {} {} changes to Lucene - now have {} documents indexed in {}",
+ command, cached, entityName, numDocs, directoryName);
+ }
+ }
+ }
+
+ /**
+ * Commits and closes all "shard" indices for this bucket.
+ *
+ * @throws IOException
+ */
+ public void close() throws IOException {
+ for (ShardBucket shardBucket : shardList) {
+ shardBucket.searcherManager.close();
+ shardBucket.indexWriter.commit();
+ shardBucket.indexWriter.close();
+ shardBucket.directory.close();
+ }
+ }
+
+ /**
+ * @return The ShardBucket currently in use for indexing new Documents.
+ */
+ public ShardBucket getCurrentShardBucket() {
+ int size = shardList.size();
+ return shardList.get(size - 1);
+ }
+ /**
+ * Provides the ShardBucket that should be used for writing the next Document.
+ * All Documents up to luceneMaxShardSize are indexed in the first shard, after
+ * that a new shard is created for the next luceneMaxShardSize Documents and so
+ * on.
+ *
+ * @return The ShardBucket that the relevant Document is/should be indexed in.
+ * @throws IOException
+ */
+ public ShardBucket routeShard() throws IOException {
+ ShardBucket shardBucket = getCurrentShardBucket();
+ if (shardBucket.documentCount.get() >= luceneMaxShardSize) {
+ shardBucket.indexWriter.commit();
+ shardBucket = buildShardBucket(shardList.size());
+ }
+ return shardBucket;
+ }
+
+ /**
+ * Releases all provided searchers for the shards in this bucket.
+ *
+ * @param subSearchers List of IndexSearcher, in shard order.
+ * @throws IOException
+ * @throws LuceneException If the number of searchers and shards isn't the same.
+ */
+ public void releaseSearchers(List subSearchers) throws IOException, LuceneException {
+ if (subSearchers.size() != shardList.size()) {
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+ "Was expecting the same number of DirectoryReaders as ShardBuckets, but had "
+ + subSearchers.size() + ", " + shardList.size() + " respectively.");
+ }
+ int i = 0;
+ for (ShardBucket shardBucket : shardList) {
+ shardBucket.searcherManager.release(subSearchers.get(i));
+ i++;
+ }
+ }
+ }
+
+ static final Logger logger = LoggerFactory.getLogger(Lucene.class);
private static final Marker fatal = MarkerFactory.getMarker("FATAL");
+ private static final IcatAnalyzer analyzer = new IcatAnalyzer();
- private java.nio.file.Path luceneDirectory;
+ private final FacetsConfig facetsConfig = new FacetsConfig();
+ private java.nio.file.Path luceneDirectory;
private int luceneCommitMillis;
-
- private AtomicLong bucketNum = new AtomicLong();
+ private long luceneMaxShardSize;
+ private long maxSearchTimeSeconds;
+ private boolean aggregateFiles;
private Map indexBuckets = new ConcurrentHashMap<>();
- private StandardQueryParser parser;
-
private Timer timer;
- private IcatAnalyzer analyzer;
-
- private Map searches = new ConcurrentHashMap<>();
+ public List facetFields = new ArrayList<>();
+ public IcatUnits icatUnits;
/**
* return the version of the lucene server
@@ -144,50 +405,28 @@ public String getVersion() {
@Consumes(MediaType.APPLICATION_JSON)
@Path("modify")
public void modify(@Context HttpServletRequest request) throws LuceneException {
-
logger.debug("Requesting modify");
int count = 0;
-
- try (JsonParser parser = Json.createParser(request.getInputStream())) {
-
- Event ev = parser.next();
- if (ev != Event.START_ARRAY) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Unexpected " + ev.name());
- }
- ev = parser.next();
-
- while (true) {
- if (ev == Event.END_ARRAY) {
- break;
- }
- if (ev != Event.START_ARRAY) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Unexpected " + ev.name());
- }
- ev = parser.next();
- String entityName = parser.getString();
- ev = parser.next();
- Long id = (ev == Event.VALUE_NULL) ? null : parser.getLong();
- ev = parser.next();
- if (ev == Event.VALUE_NULL) {
- try {
- IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
- if (bucket.locked.get()) {
- throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
- "Lucene locked for " + entityName);
- }
- bucket.indexWriter.deleteDocuments(new Term("id", Long.toString(id)));
- } catch (IOException e) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
- }
+ try (JsonReader reader = Json.createReader(request.getInputStream())) {
+ List operations = reader.readArray().getValuesAs(JsonObject.class);
+ for (JsonObject operation : operations) {
+ if (operation.size() != 1) {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Operation object should only have one key/value pair, but request had "
+ + operation.size());
+ } else if (operation.containsKey("create")) {
+ create(operation.getJsonObject("create"));
+ } else if (operation.containsKey("update")) {
+ update(operation.getJsonObject("update"));
+ } else if (operation.containsKey("delete")) {
+ delete(operation.getJsonObject("delete"));
} else {
- add(request, entityName, When.Sometime, parser, id);
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Operation key should be one of 'create', 'update', 'delete', but it was "
+ + operation.keySet());
}
- ev = parser.next(); // end of triple
- count++;
- ev = parser.next(); // either end of input or start of new
- // triple
}
-
+ count = operations.size();
} catch (IOException e) {
throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
}
@@ -195,97 +434,6 @@ public void modify(@Context HttpServletRequest request) throws LuceneException {
}
- /* if id is not null this is actually an update */
- private void add(HttpServletRequest request, String entityName, When when, JsonParser parser, Long id)
- throws LuceneException, IOException {
-
- IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
-
- AttributeName attName = null;
- FieldType fType = null;
- String name = null;
- String value = null;
- Double dvalue = null;
- Store store = Store.NO;
- Document doc = new Document();
-
- parser.next(); // Skip the [
- while (parser.hasNext()) {
- Event ev = parser.next();
- if (ev == Event.KEY_NAME) {
- try {
- attName = AttributeName.valueOf(parser.getString());
- } catch (Exception e) {
- throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
- "Found unknown field type " + e.getMessage());
- }
- } else if (ev == Event.VALUE_STRING) {
- if (attName == AttributeName.type) {
- try {
- fType = FieldType.valueOf(parser.getString());
- } catch (Exception e) {
- throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
- "Found unknown field type " + e.getMessage());
- }
- } else if (attName == AttributeName.name) {
- name = parser.getString();
- } else if (attName == AttributeName.value) {
- value = parser.getString();
- } else {
- throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_STRING " + attName);
- }
- } else if (ev == Event.VALUE_NUMBER) {
- long num = parser.getLong();
- if (fType == FieldType.SortedDocValuesField) {
- value = Long.toString(num);
- } else if (fType == FieldType.DoubleField) {
- dvalue = parser.getBigDecimal().doubleValue();
- } else {
- throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
- "Bad VALUE_NUMBER " + attName + " " + fType);
- }
- } else if (ev == Event.VALUE_TRUE) {
- if (attName == AttributeName.store) {
- store = Store.YES;
- } else {
- throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_TRUE " + attName);
- }
- } else if (ev == Event.START_OBJECT) {
- fType = null;
- name = null;
- value = null;
- store = Store.NO;
- } else if (ev == Event.END_OBJECT) {
- if (fType == FieldType.TextField) {
- doc.add(new TextField(name, value, store));
- } else if (fType == FieldType.StringField) {
- doc.add(new StringField(name, value, store));
- } else if (fType == FieldType.SortedDocValuesField) {
- doc.add(new SortedDocValuesField(name, new BytesRef(value)));
- } else if (fType == FieldType.DoubleField) {
- doc.add(new DoubleField(name, dvalue, store));
- }
- } else if (ev == Event.END_ARRAY) {
- if (id == null) {
- if (bucket.locked.get() && when == When.Sometime) {
- throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
- "Lucene locked for " + entityName);
- }
- bucket.indexWriter.addDocument(doc);
- } else {
- if (bucket.locked.get()) {
- throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
- "Lucene locked for " + entityName);
- }
- bucket.indexWriter.updateDocument(new Term("id", id.toString()), doc);
- }
- return;
- } else {
- throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Unexpected token in Json: " + ev);
- }
- }
- }
-
/**
* Expect an array of documents each encoded as an array of things to add to
* the document
@@ -295,22 +443,22 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
@Path("addNow/{entityName}")
public void addNow(@Context HttpServletRequest request, @PathParam("entityName") String entityName)
throws LuceneException {
+ List documents;
+ JsonStructure value = null;
logger.debug("Requesting addNow of {}", entityName);
- int count = 0;
- try (JsonParser parser = Json.createParser(request.getInputStream())) {
- Event ev = parser.next(); // Opening [
- while (true) {
- ev = parser.next(); // Final ] or another document
- if (ev == Event.END_ARRAY) {
- break;
- }
- add(request, entityName, When.Now, parser, null);
- count++;
+ try (JsonReader reader = Json.createReader(request.getInputStream())) {
+ value = reader.read();
+ documents = ((JsonArray) value).getValuesAs(JsonObject.class);
+ for (JsonObject document : documents) {
+ createNow(entityName, document);
}
+ } catch (JsonException e) {
+ logger.error("Could not parse JSON from {}", value);
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
} catch (IOException e) {
throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
}
- logger.debug("Added {} {} documents", count, entityName);
+ logger.debug("Added {} {} documents", documents.size(), entityName);
}
/*
@@ -323,11 +471,7 @@ public void clear() throws LuceneException {
logger.info("Requesting clear");
exit();
- timer = new Timer("LuceneCommitTimer");
-
- bucketNum.set(0);
indexBuckets.clear();
- searches.clear();
try {
Files.walk(luceneDirectory, FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder())
@@ -336,26 +480,24 @@ public void clear() throws LuceneException {
throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
}
- timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
+ initTimer();
logger.info("clear complete - ready to go again");
}
+ /**
+ * Commits any pending documents to their respective index.
+ */
@POST
@Path("commit")
public void commit() throws LuceneException {
- logger.debug("Requesting commit");
+ logger.debug("Requesting commit for {} IndexBuckets", indexBuckets.size());
try {
for (Entry entry : indexBuckets.entrySet()) {
IndexBucket bucket = entry.getValue();
if (!bucket.locked.get()) {
- int cached = bucket.indexWriter.numRamDocs();
- bucket.indexWriter.commit();
- if (cached != 0) {
- logger.debug("Synch has committed {} {} changes to Lucene - now have {} documents indexed",
- cached, entry.getKey(), bucket.indexWriter.numDocs());
- }
- bucket.searcherManager.maybeRefreshBlocking();
+ logger.trace("{} is unlocked", entry.getKey());
+ bucket.commit("Synch", entry.getKey());
}
}
} catch (IOException e) {
@@ -363,203 +505,328 @@ public void commit() throws LuceneException {
}
}
- private IndexBucket createBucket(String name) {
- try {
- IndexBucket bucket = new IndexBucket();
- FSDirectory directory = FSDirectory.open(luceneDirectory.resolve(name));
- bucket.directory = directory;
- IndexWriterConfig config = new IndexWriterConfig(analyzer);
- IndexWriter iwriter = new IndexWriter(directory, config);
- String[] files = directory.listAll();
- if (files.length == 1 && files[0].equals("write.lock")) {
- logger.debug("Directory only has the write.lock file so store and delete a dummy document");
- Document doc = new Document();
- doc.add(new StringField("dummy", "dummy", Store.NO));
- iwriter.addDocument(doc);
- iwriter.commit();
- iwriter.deleteDocuments(new Term("dummy", "dummy"));
- iwriter.commit();
- logger.debug("Now have " + iwriter.numDocs() + " documents indexed");
+ /**
+ * Creates a new Lucene document, provided that the target index is not locked
+ * for another operation.
+ *
+ * @param operationBody JsonObject containing the "_index" that the new "doc"
+ * should be created in.
+ * @throws NumberFormatException
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private void create(JsonObject operationBody) throws NumberFormatException, IOException, LuceneException {
+ String entityName = operationBody.getString("_index");
+ if (DocumentMapping.relationships.containsKey(entityName)) {
+ updateByRelation(operationBody, false);
+ }
+ if (DocumentMapping.indexedEntities.contains(entityName)) {
+ JsonObject documentObject = operationBody.getJsonObject("doc");
+ Document document = parseDocument(documentObject);
+ logger.trace("create {} {}", entityName, document);
+ IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
+ if (bucket.locked.get()) {
+ throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+ "Lucene locked for " + entityName);
+ }
+ bucket.addDocument(facetsConfig.build(document));
+ // Special case for filesizes
+ if (aggregateFiles && entityName.equals("Datafile")) {
+ JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
+ if (jsonFileSize != null) {
+ JsonNumber datasetId = documentObject.getJsonNumber("dataset.id");
+ JsonNumber investigationId = documentObject.getJsonNumber("investigation.id");
+ aggregateFileSize(jsonFileSize.longValueExact(), 0, 1, datasetId, "dataset");
+ aggregateFileSize(jsonFileSize.longValueExact(), 0, 1, investigationId, "investigation");
+ }
}
- bucket.indexWriter = iwriter;
- bucket.searcherManager = new SearcherManager(iwriter, false, null);
- logger.debug("Bucket for {} is now ready", name);
- return bucket;
- } catch (Throwable e) {
- logger.error("Can't continue " + e.getClass() + " " + e.getMessage());
- return null;
}
}
- @POST
- @Consumes(MediaType.APPLICATION_JSON)
- @Produces(MediaType.APPLICATION_JSON)
- @Path("datafiles")
- public String datafiles(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
- throws LuceneException {
-
- Long uid = null;
- try {
- uid = bucketNum.getAndIncrement();
- Search search = new Search();
- searches.put(uid, search);
- Map map = new HashMap<>();
- search.map = map;
-
- try (JsonReader r = Json.createReader(request.getInputStream())) {
- JsonObject o = r.readObject();
- String userName = o.getString("user", null);
-
- BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
- if (userName != null) {
- Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
- new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
- ScoreMode.None);
-
- Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
- getSearcher(map, "Investigation"), ScoreMode.None);
-
- Query dsQuery = JoinUtil.createJoinQuery("id", false, "dataset", invQuery,
- getSearcher(map, "Dataset"), ScoreMode.None);
-
- theQuery.add(dsQuery, Occur.MUST);
- }
-
- String text = o.getString("text", null);
- if (text != null) {
- theQuery.add(parser.parse(text, "text"), Occur.MUST);
- }
-
- String lower = o.getString("lower", null);
- String upper = o.getString("upper", null);
- if (lower != null && upper != null) {
- theQuery.add(new TermRangeQuery("date", new BytesRef(lower), new BytesRef(upper), true, true),
- Occur.MUST);
- }
+ /**
+ * Changes the fileSize on an entity by the specified amount. This is used to
+ * aggregate the individual fileSize of Datafiles up to Dataset and
+ * Investigation sizes.
+ *
+ * @param sizeToAdd Increases the fileSize of the entity by this much.
+ * Should be 0 for deletes.
+ * @param sizeToSubtract Decreases the fileSize of the entity by this much.
+ * Should be 0 for creates.
+ * @param deltaFileCount Changes the file count by this much.
+ * @param entityId Icat id of entity to update as a JsonNumber.
+ * @param index Index (entity) to update.
+ * @throws IOException
+ */
+ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, JsonNumber entityId,
+ String index) throws IOException {
+ if (entityId != null) {
+ aggregateFileSize(sizeToAdd, sizeToSubtract, deltaFileCount, entityId.longValueExact(), index);
+ }
+ }
- if (o.containsKey("params")) {
- JsonArray params = o.getJsonArray("params");
- IndexSearcher datafileParameterSearcher = getSearcher(map, "DatafileParameter");
- for (JsonValue p : params) {
- BooleanQuery.Builder paramQuery = parseParameter(p);
- Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
- datafileParameterSearcher, ScoreMode.None);
- theQuery.add(toQuery, Occur.MUST);
+ /**
+ * Changes the fileSize on an entity by the specified amount. This is used to
+ * aggregate the individual fileSize of Datafiles up to Dataset and
+ * Investigation sizes.
+ *
+ * @param sizeToAdd Increases the fileSize of the entity by this much.
+ * Should be 0 for deletes.
+ * @param sizeToSubtract Decreases the fileSize of the entity by this much.
+ * Should be 0 for creates.
+ * @param deltaFileCount Changes the file count by this much.
+ * @param entityId Icat id of entity to update as a long.
+ * @param index Index (entity) to update.
+ * @throws IOException
+ */
+ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, long entityId,
+ String index) throws IOException {
+ long deltaFileSize = sizeToAdd - sizeToSubtract;
+ if (deltaFileSize != 0 || deltaFileCount != 0) {
+ IndexBucket indexBucket = indexBuckets.computeIfAbsent(index, k -> new IndexBucket(k));
+ for (ShardBucket shardBucket : indexBucket.shardList) {
+ shardBucket.commit();
+ IndexSearcher searcher = shardBucket.searcherManager.acquire();
+ try {
+ Query idQuery = LongPoint.newExactQuery("id", entityId);
+ TopDocs topDocs = searcher.search(idQuery, 1);
+ if (topDocs.totalHits.value == 1) {
+ int docId = topDocs.scoreDocs[0].doc;
+ Document document = searcher.doc(docId);
+ Set prunedFields = new HashSet<>();
+ List fieldsToAdd = new ArrayList<>();
+
+ incrementFileStatistic("fileSize", deltaFileSize, document, prunedFields, fieldsToAdd);
+ incrementFileStatistic("fileCount", deltaFileCount, document, prunedFields, fieldsToAdd);
+
+ Document newDocument = pruneDocument(prunedFields, document);
+ fieldsToAdd.forEach(field -> newDocument.add(field));
+ shardBucket.indexWriter.deleteDocuments(idQuery);
+ shardBucket.indexWriter.addDocument(facetsConfig.build(newDocument));
+ shardBucket.commit();
+ break;
}
+ } finally {
+ shardBucket.searcherManager.release(searcher);
}
- search.query = maybeEmptyQuery(theQuery);
}
-
- return luceneSearchResult("Datafile", search, maxResults, uid);
- } catch (Exception e) {
- logger.error("Error", e);
- freeSearcher(uid);
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
}
}
- @GET
- @Produces(MediaType.APPLICATION_JSON)
- @Path("datafiles/{uid}")
- public String datafilesAfter(@PathParam("uid") long uid, @QueryParam("maxResults") int maxResults)
- throws LuceneException {
- try {
- Search search = searches.get(uid);
- try {
- return luceneSearchResult("Datafile", search, maxResults, null);
- } catch (Exception e) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
- }
- } catch (Exception e) {
- freeSearcher(uid);
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ /**
+ * Increments a field relating to file statistics (count, size) as part of the
+ * update on a Document.
+ *
+ * @param statisticName Name of the field to increment, i.e. fileCount or
+ * fileSize.
+ * @param statisticDelta Change in the value of the named statistic.
+ * @param document Lucene Document containing the old statistic value to
+ * be incremented.
+ * @param prunedFields Set of fields which need to be removed from the old
+ * Document. If the statistic is incremented, this will
+ * have statisticName added to it.
+ * @param fieldsToAdd List of Lucene IndexableFields to add to the new
+ * Document.
+ */
+ private void incrementFileStatistic(String statisticName, long statisticDelta, Document document,
+ Set prunedFields, List fieldsToAdd) {
+ if (statisticDelta != 0) {
+ prunedFields.add(statisticName);
+ long oldValue = document.getField(statisticName).numericValue().longValue();
+ long newValue = oldValue + statisticDelta;
+ fieldsToAdd.add(new LongPoint(statisticName, newValue));
+ fieldsToAdd.add(new StoredField(statisticName, newValue));
+ fieldsToAdd.add(new NumericDocValuesField(statisticName, newValue));
}
}
+ /**
+ * Creates a new Lucene document.
+ *
+ * @param entityName Name of the entity/index to create the document in.
+ * @param documentJson JsonObject representation of the document to be created.
+ * @throws NumberFormatException
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private void createNow(String entityName, JsonObject documentJson)
+ throws NumberFormatException, IOException, LuceneException {
+ Document document = parseDocument(documentJson);
+ logger.trace("create {} {}", entityName, document);
+ IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
+ bucket.addDocument(facetsConfig.build(document));
+ }
+
+ /**
+ * Perform search on the Datafile entity/index.
+ *
+ * @param request Incoming Http request containing the query as Json.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results to include in the returned
+ * Json.
+ * @param sort String of Json representing the sort criteria.
+ * @return String of Json representing the results of the search.
+ * @throws LuceneException
+ */
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
- @Path("datasets")
- public String datasets(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
- throws LuceneException {
-
- Long uid = null;
- try {
- uid = bucketNum.getAndIncrement();
- Search search = new Search();
- searches.put(uid, search);
- Map map = new HashMap<>();
- search.map = map;
- try (JsonReader r = Json.createReader(request.getInputStream())) {
- JsonObject o = r.readObject();
- String userName = o.getString("user", null);
-
- BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
- if (userName != null) {
-
- Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
- new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
- ScoreMode.None);
+ @Path("datafile")
+ public String datafiles(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
+ @QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
+ return searchEntity(request, searchAfter, maxResults, sort, SearchType.DATAFILE);
+ }
- Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
- getSearcher(map, "Investigation"), ScoreMode.None);
+ /**
+ * Perform search on the Dataset entity/index.
+ *
+ * @param request Incoming Http request containing the query as Json.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results to include in the returned
+ * Json.
+ * @param sort String of Json representing the sort criteria.
+ * @return String of Json representing the results of the search.
+ * @throws LuceneException
+ */
+ @POST
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ @Path("dataset")
+ public String datasets(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
+ @QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
+ return searchEntity(request, searchAfter, maxResults, sort, SearchType.DATASET);
+ }
- theQuery.add(invQuery, Occur.MUST);
+ /**
+ * Deletes a Lucene document, provided that the target index is not locked for
+ * another operation.
+ *
+ * @param operationBody JsonObject containing the "_index" and the "_id" of the
+ * Document to be deleted.
+ * @throws LuceneException
+ * @throws IOException
+ */
+ private void delete(JsonObject operationBody) throws LuceneException, IOException {
+ String entityName = operationBody.getString("_index");
+ if (DocumentMapping.relationships.containsKey(entityName)) {
+ updateByRelation(operationBody, true);
+ }
+ if (DocumentMapping.indexedEntities.contains(entityName)) {
+ long icatId = operationBody.getJsonNumber("_id").longValueExact();
+ try {
+ IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
+ if (bucket.locked.get()) {
+ throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+ "Lucene locked for " + entityName);
}
-
- String text = o.getString("text", null);
- if (text != null) {
- theQuery.add(parser.parse(text, "text"), Occur.MUST);
+ logger.trace("delete {} {}", entityName, icatId);
+ Query idQuery = LongPoint.newExactQuery("id", icatId);
+ // Special case for filesizes
+ if (aggregateFiles && entityName.equals("Datafile")) {
+ for (ShardBucket shardBucket : bucket.shardList) {
+ IndexSearcher datafileSearcher = shardBucket.searcherManager.acquire();
+ try {
+ TopDocs topDocs = datafileSearcher.search(idQuery, 1);
+ if (topDocs.totalHits.value == 1) {
+ int docId = topDocs.scoreDocs[0].doc;
+ Document datasetDocument = datafileSearcher.doc(docId);
+ long sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
+ if (sizeToSubtract > 0) {
+ long datasetId = datasetDocument.getField("dataset.id").numericValue().longValue();
+ long investigationId = datasetDocument.getField("investigation.id").numericValue()
+ .longValue();
+ aggregateFileSize(0, sizeToSubtract, -1, datasetId, "dataset");
+ aggregateFileSize(0, sizeToSubtract, -1, investigationId, "investigation");
+ }
+ break;
+ }
+ } finally {
+ shardBucket.searcherManager.release(datafileSearcher);
+ }
+ }
}
-
- String lower = o.getString("lower", null);
- String upper = o.getString("upper", null);
- if (lower != null && upper != null) {
- theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
- Occur.MUST);
- theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
- Occur.MUST);
+ for (ShardBucket shardBucket : bucket.shardList) {
+ shardBucket.indexWriter.deleteDocuments(idQuery);
}
+ } catch (IOException e) {
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ }
+ }
+ }
- if (o.containsKey("params")) {
- JsonArray params = o.getJsonArray("params");
- IndexSearcher datasetParameterSearcher = getSearcher(map, "DatasetParameter");
- for (JsonValue p : params) {
- BooleanQuery.Builder paramQuery = parseParameter(p);
- Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
- datasetParameterSearcher, ScoreMode.None);
- theQuery.add(toQuery, Occur.MUST);
- }
+ /**
+ * Encodes core Lucene information (keys preceded by underscores) and a
+ * selection of the Document's source fields to JSON to be returned to
+ * icat.server. Note that "_id" is the Lucene Document id, and should not be
+ * confused with the ICAT entity id, which should be denoted by the key "id"
+ * within the "_source" object.
+ *
+ * @param gen JsonGenerator to encode the information to.
+ * @param hit ScoreDoc representing a single search result.
+ * @param searcher IndexSearcher used to get the Document for the hit.
+ * @param search Search object containing the fields to return.
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher,
+ SearchBucket search)
+ throws IOException, LuceneException {
+ int luceneDocId = hit.doc;
+ int shardIndex = hit.shardIndex;
+ Document document = searcher.doc(luceneDocId);
+ gen.writeStartObject().write("_id", luceneDocId).write("_shardIndex", shardIndex);
+ Float score = hit.score;
+ if (!score.equals(Float.NaN)) {
+ gen.write("_score", hit.score);
+ }
+ gen.writeStartObject("_source");
+ document.forEach(encodeField(gen, search.fields));
+ for (String joinedEntityName : search.joinedFields.keySet()) {
+ List searchers = getSearchers(search.searcherMap, joinedEntityName);
+ List shards = getShards(joinedEntityName);
+ SearchBucket joinedSearch = new SearchBucket(this);
+ String fld;
+ long parentId;
+ if (joinedEntityName.toLowerCase().contains("investigation")) {
+ fld = "investigation.id";
+ if (entityName.equalsIgnoreCase("investigation")) {
+ parentId = document.getField("id").numericValue().longValue();
+ } else {
+ parentId = document.getField("investigation.id").numericValue().longValue();
}
- search.query = maybeEmptyQuery(theQuery);
+ } else {
+ fld = entityName.toLowerCase() + ".id";
+ parentId = document.getField("id").numericValue().longValue();
}
- return luceneSearchResult("Dataset", search, maxResults, uid);
- } catch (Exception e) {
- logger.error("Error", e);
- freeSearcher(uid);
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ joinedSearch.query = LongPoint.newExactQuery(fld, parentId);
+ joinedSearch.sort = new Sort(new SortedNumericSortField("id", Type.LONG));
+ TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards);
+ gen.writeStartArray(joinedEntityName.toLowerCase());
+ for (ScoreDoc joinedHit : topFieldDocs.scoreDocs) {
+ gen.writeStartObject();
+ Document joinedDocument = searchers.get(joinedHit.shardIndex).doc(joinedHit.doc);
+ joinedDocument.forEach(encodeField(gen, search.joinedFields.get(joinedEntityName)));
+ gen.writeEnd();
+ }
+ gen.writeEnd();
}
-
+ gen.writeEnd().writeEnd(); // source object, result object
}
- @GET
- @Produces(MediaType.APPLICATION_JSON)
- @Path("datasets/{uid}")
- public String datasetsAfter(@PathParam("uid") long uid, @QueryParam("maxResults") int maxResults)
- throws LuceneException {
- try {
- Search search = searches.get(uid);
- try {
- return luceneSearchResult("Dataset", search, maxResults, null);
- } catch (Exception e) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ private Consumer super IndexableField> encodeField(JsonGenerator gen, Set fields) {
+ return (field) -> {
+ String fieldName = field.name();
+ if (fields.contains(fieldName)) {
+ if (DocumentMapping.longFields.contains(fieldName)) {
+ gen.write(fieldName, field.numericValue().longValue());
+ } else if (DocumentMapping.doubleFields.contains(fieldName)) {
+ gen.write(fieldName, field.numericValue().doubleValue());
+ } else {
+ gen.write(fieldName, field.stringValue());
+ }
}
- } catch (Exception e) {
- freeSearcher(uid);
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
- }
+ };
}
@PreDestroy
@@ -571,12 +838,8 @@ private void exit() {
timer = null; // This seems to be necessary to make it really stop
}
try {
- for (Entry entry : indexBuckets.entrySet()) {
- IndexBucket bucket = entry.getValue();
- bucket.searcherManager.close();
- bucket.indexWriter.commit();
- bucket.indexWriter.close();
- bucket.directory.close();
+ for (IndexBucket bucket : indexBuckets.values()) {
+ bucket.close();
}
logger.info("Closed down icat.lucene");
} catch (Exception e) {
@@ -584,43 +847,123 @@ private void exit() {
}
}
- @DELETE
- @Path("freeSearcher/{uid}")
- public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
- if (uid != null) { // May not be set for internal calls
- logger.debug("Requesting freeSearcher {}", uid);
- Map search = searches.get(uid).map;
- for (Entry entry : search.entrySet()) {
+ /**
+ * Perform faceting on an entity/index. The query associated with the request
+ * should determine which Documents to consider, and optionally the dimensions
+ * to facet. If no dimensions are provided, "sparse" faceting is performed
+ * across relevant string fields (but no Range faceting occurs).
+ *
+ * @param entityName Name of the entity/index to facet on.
+ * @param request Incoming Http request containing the query as Json.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results to include in the returned
+ * Json.
+ * @param maxLabels The maximum number of labels to return for each dimension
+ * of the facets.
+ * @param sort String of Json representing the sort criteria.
+ * @return String of Json representing the results of the faceting.
+ * @throws LuceneException
+ */
+ @POST
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ @Path("{entityName}/facet")
+ public String facet(@PathParam("entityName") String entityName, @Context HttpServletRequest request,
+ @QueryParam("search_after") String searchAfter, @QueryParam("maxResults") int maxResults,
+ @QueryParam("maxLabels") int maxLabels, @QueryParam("sort") String sort) throws LuceneException {
+ SearchBucket search = null;
+ try {
+ search = new SearchBucket(this, SearchType.GENERIC, request, sort, null);
+ return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels);
+ } catch (IOException | QueryNodeException e) {
+ logger.error("Error", e);
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ } finally {
+ freeSearcher(search);
+ }
+ }
+
+ /**
+ * Releases all IndexSearchers associated with a SearchBucket.
+ *
+ * @param search SearchBucket to be freed.
+ * @throws LuceneException
+ */
+ public void freeSearcher(SearchBucket search) throws LuceneException {
+ if (search != null) {
+ for (Entry> entry : search.searcherMap.entrySet()) {
String name = entry.getKey();
- IndexSearcher isearcher = entry.getValue();
- SearcherManager manager = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).searcherManager;
+ List subReaders = entry.getValue();
try {
- manager.release(isearcher);
+ indexBuckets.computeIfAbsent(name.toLowerCase(), k -> new IndexBucket(k))
+ .releaseSearchers(subReaders);
} catch (IOException e) {
throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
}
}
- searches.remove(uid);
}
}
- /*
- * Need a new set of IndexSearchers for each search as identified by a uid
+ /**
+ * Gets all IndexSearchers needed for the shards of a given entity/index.
+ *
+ * @param searcherMap Map of entity names to their IndexSearchers.
+ * @param name Name of the entity to get the IndexSearchers for.
+ * @return List of IndexSearchers for name.
+ * @throws IOException
+ */
+ private List getSearchers(Map> searcherMap, String name)
+ throws IOException {
+ String nameLowercase = name.toLowerCase();
+ logger.trace("Get searchers for {}", nameLowercase);
+ List subSearchers = searcherMap.get(nameLowercase);
+ if (subSearchers == null) {
+ logger.trace("No searchers found for {}", nameLowercase);
+ subSearchers = indexBuckets.computeIfAbsent(nameLowercase, k -> new IndexBucket(k)).acquireSearchers();
+ searcherMap.put(nameLowercase, subSearchers);
+ logger.debug("Remember searcher for {}", nameLowercase);
+ }
+ return subSearchers;
+ }
+
+ /**
+ * Gets a single IndexSearcher for name. When multiple shards are possible,
+ * getSearchers should be used instead.
+ *
+ * @param searcherMap Map of entity names to their IndexSearchers.
+ * @param name Name of the entity to get the IndexSearcher for.
+ * @return The IndexSearcher for name.
+ * @throws IOException
+ * @throws LuceneException If there are more than one shard for name.
*/
- private IndexSearcher getSearcher(Map bucket, String name) throws IOException {
- IndexSearcher isearcher = bucket.get(name);
- if (isearcher == null) {
- isearcher = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).searcherManager.acquire();
- bucket.put(name, isearcher);
- logger.debug("Remember searcher for {}", name);
+ public IndexSearcher getSearcher(Map> searcherMap, String name)
+ throws IOException, LuceneException {
+ List subSearchers = searcherMap.get(name);
+ subSearchers = getSearchers(searcherMap, name);
+ if (subSearchers.size() != 1) {
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+ "Cannot get single IndexSearcher for " + name + " as it has " + subSearchers.size() + " shards");
}
- return isearcher;
+ return subSearchers.get(0);
+ }
+
+ /**
+ * Gets all ShardBuckets of a given entity/index.
+ *
+ * @param name Name of the entity to get the ShardBuckets for.
+ * @return List of ShardBuckets for name.
+ */
+ private List getShards(String name) {
+ return indexBuckets.computeIfAbsent(name.toLowerCase(), k -> new IndexBucket(k)).shardList;
}
@PostConstruct
private void init() {
logger.info("Initialising icat.lucene");
CheckedProperties props = new CheckedProperties();
+ String unitsString;
+ int commitSeconds;
try {
props.loadFromResource("run.properties");
@@ -629,24 +972,39 @@ private void init() {
throw new Exception(luceneDirectory + " is not a directory");
}
- luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
-
- analyzer = new IcatAnalyzer();
+ commitSeconds = props.getPositiveInt("commitSeconds");
+ luceneCommitMillis = commitSeconds * 1000;
+ luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), Long.valueOf(Integer.MAX_VALUE + 1));
+ maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds")
+ : 5;
+ aggregateFiles = props.getBoolean("aggregateFiles", false);
- parser = new StandardQueryParser();
- StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler) parser.getQueryConfigHandler();
- qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
- qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
+ initTimer();
- timer = new Timer("LuceneCommitTimer");
- timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
+ unitsString = props.getString("units", "");
+ icatUnits = new IcatUnits(unitsString);
+ String facetFieldsString = props.getString("facetFields", "");
+ for (String facetField : facetFieldsString.split("\\s+")) {
+ facetFields.add(facetField);
+ }
} catch (Exception e) {
logger.error(fatal, e.getMessage());
throw new IllegalStateException(e.getMessage());
}
- logger.info("Initialised icat.lucene");
+ String format = "Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, "
+ + "maxSearchTimeSeconds {}, aggregateFiles {}, units {}, facetFields {}";
+ logger.info(format, luceneDirectory, commitSeconds, luceneMaxShardSize, maxSearchTimeSeconds,
+ aggregateFiles, unitsString, facetFields);
+ }
+
+ /**
+ * Starts a timer and schedules regular commits of the IndexWriter.
+ */
+ private void initTimer() {
+ timer = new Timer("LuceneCommitTimer");
+ timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
}
class CommitTimerTask extends TimerTask {
@@ -660,220 +1018,762 @@ public void run() {
}
}
+ /**
+ * Perform search on the Investigation entity/index.
+ *
+ * @param request Incoming Http request containing the query as Json.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results to include in the returned
+ * Json.
+ * @param sort String of Json representing the sort criteria.
+ * @return String of Json representing the results of the search.
+ * @throws LuceneException
+ */
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
- @Path("investigations")
- public String investigations(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
- throws LuceneException {
- Long uid = null;
- try {
- uid = bucketNum.getAndIncrement();
- Search search = new Search();
- searches.put(uid, search);
- Map map = new HashMap<>();
- search.map = map;
- try (JsonReader r = Json.createReader(request.getInputStream())) {
- JsonObject o = r.readObject();
- String userName = o.getString("user", null);
-
- BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
- if (userName != null) {
- Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
- new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
- ScoreMode.None);
- theQuery.add(iuQuery, Occur.MUST);
- }
+ @Path("investigation")
+ public String investigations(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
+ @QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
+ return searchEntity(request, searchAfter, maxResults, sort, SearchType.INVESTIGATION);
+ }
- String text = o.getString("text", null);
- if (text != null) {
- theQuery.add(parser.parse(text, "text"), Occur.MUST);
- }
+ /**
+ * Locks the specified index for population, optionally removing all existing
+ * documents and preventing normal modify operations until the index is
+ * unlocked.
+ *
+ * A check is also performed against the minId and maxId used for population.
+ * This ensures that no data is duplicated in the index.
+ *
+ * @param entityName Name of the entity/index to lock.
+ * @param minId The exclusive minimum ICAT id being populated for. If
+ * Documents already exist with an id greater than this, the
+ * lock will fail. If null, treated as if it were
+ * Long.MIN_VALUE
+ * @param maxId The inclusive maximum ICAT id being populated for. If
+ * Documents already exist with an id less than or equal to
+ * this, the lock will fail. If null, treated as if it were
+ * Long.MAX_VALUE
+ * @param delete Whether to delete all existing Documents on the index.
+ * @throws LuceneException If already locked, if there's an IOException when
+ * deleting documents, or if the min/max id values are
+ * provided and Documents already exist in that range.
+ */
+ @POST
+ @Path("lock/{entityName}")
+ public void lock(@PathParam("entityName") String entityName, @QueryParam("minId") Long minId,
+ @QueryParam("maxId") Long maxId, @QueryParam("delete") boolean delete) throws LuceneException {
+ try {
+ logger.info("Requesting lock of {} index, minId={}, maxId={}, delete={}", entityName, minId, maxId, delete);
+ IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
- String lower = o.getString("lower", null);
- String upper = o.getString("upper", null);
- if (lower != null && upper != null) {
- theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
- Occur.MUST);
- theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
- Occur.MUST);
+ if (!bucket.locked.compareAndSet(false, true)) {
+ String message = "Lucene already locked for " + entityName;
+ throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, message);
+ }
+ if (delete) {
+ for (ShardBucket shardBucket : bucket.shardList) {
+ shardBucket.indexWriter.deleteAll();
}
+ // Reset the shardList so we reset the routing
+ ShardBucket shardBucket = bucket.shardList.get(0);
+ bucket.shardList = new ArrayList<>();
+ bucket.shardList.add(shardBucket);
+ return;
+ }
- if (o.containsKey("params")) {
- JsonArray params = o.getJsonArray("params");
- IndexSearcher investigationParameterSearcher = getSearcher(map, "InvestigationParameter");
-
- for (JsonValue p : params) {
- BooleanQuery.Builder paramQuery = parseParameter(p);
- Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
- investigationParameterSearcher, ScoreMode.None);
- theQuery.add(toQuery, Occur.MUST);
+ for (ShardBucket shardBucket : bucket.shardList) {
+ IndexSearcher searcher = shardBucket.searcherManager.acquire();
+ try {
+ Query query;
+ if (minId == null && maxId == null) {
+ query = new MatchAllDocsQuery();
+ } else {
+ if (minId == null) {
+ minId = Long.MIN_VALUE;
+ }
+ if (maxId == null) {
+ maxId = Long.MAX_VALUE;
+ }
+ query = LongPoint.newRangeQuery("id", minId + 1, maxId);
+ }
+ TopDocs topDoc = searcher.search(query, 1);
+ if (topDoc.scoreDocs.length != 0) {
+ // If we have any results in the populating range, unlock and throw
+ bucket.locked.compareAndSet(true, false);
+ Document doc = searcher.doc(topDoc.scoreDocs[0].doc);
+ long id = doc.getField("id").numericValue().longValue();
+ String message = "While locking index, found id " + id + " in specified range";
+ logger.error(message);
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
}
+ } finally {
+ shardBucket.searcherManager.release(searcher);
}
+ }
+ } catch (IOException e) {
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ }
+ }
- if (o.containsKey("samples")) {
- JsonArray samples = o.getJsonArray("samples");
- IndexSearcher sampleSearcher = getSearcher(map, "Sample");
-
- for (JsonValue s : samples) {
- JsonString sample = (JsonString) s;
- BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
- sampleQuery.add(parser.parse(sample.getString(), "text"), Occur.MUST);
- Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", sampleQuery.build(),
- sampleSearcher, ScoreMode.None);
- theQuery.add(toQuery, Occur.MUST);
+ /**
+ * Perform faceting on an entity/index.
+ *
+ * @param name Entity/index to facet.
+ * @param search SearchBucket containing the search query, dimensions to
+ * facet etc.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results from the search.
+ * @param maxLabels The maximum number of labels to return for each dimension
+ * of the facets.
+ * @return String of Json representing the facets of the search results.
+ * @throws IOException
+ * @throws IllegalStateException If the IndexSearcher and its DirectoryReader
+ * are not in sync.
+ * @throws LuceneException If ranges are provided for a non-numeric field,
+ * or something else goes wrong.
+ */
+ private String luceneFacetResult(String name, SearchBucket search, String searchAfter, int maxResults,
+ int maxLabels) throws IOException, IllegalStateException, LuceneException {
+ // If no dimensions were specified, perform "sparse" faceting on all applicable
+ // string values
+ boolean sparse = search.dimensions.size() == 0;
+ // By default, assume we do not need to perform string based faceting for
+ // specific dimensions
+ boolean facetStrings = false;
+ if (maxResults <= 0 || maxLabels <= 0) {
+ // This will result in no Facets and a null pointer, so return early
+ logger.warn("Cannot facet when maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
+ } else {
+ // Iterate over shards and aggregate the facets from each
+ logger.debug("Faceting {} with {} after {} ", name, search.query, searchAfter);
+ List shards = getShards(name);
+ for (ShardBucket shard : shards) {
+ FacetsCollector facetsCollector = new FacetsCollector();
+ IndexSearcher indexSearcher = shard.searcherManager.acquire();
+ try {
+ TopDocs results = FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
+ logger.debug("{}", results.totalHits);
+ for (FacetedDimension facetedDimension : search.dimensions.values()) {
+ facetStrings = facetRanges(maxLabels, facetStrings, facetsCollector, facetedDimension);
+ }
+ if (shard.state == null) {
+ logger.debug("State not set, this is most likely due to not having any facetable fields");
+ continue;
+ } else if (shard.state.reader != indexSearcher.getIndexReader()) {
+ logger.warn("Attempted search with outdated state, create new one from current IndexReader");
+ shard.state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
}
+ facetStrings(search, maxLabels, sparse, facetStrings, indexSearcher, facetsCollector, shard.state);
+ } finally {
+ shard.searcherManager.release(indexSearcher);
}
+ }
+ }
+ // Build results
+ JsonObjectBuilder aggregationsBuilder = Json.createObjectBuilder();
+ search.dimensions.values().forEach(facetedDimension -> facetedDimension.buildResponse(aggregationsBuilder));
+ String aggregations = Json.createObjectBuilder().add("aggregations", aggregationsBuilder).build().toString();
+ logger.debug("aggregations: {}", aggregations);
+ return aggregations;
+ }
- String userFullName = o.getString("userFullName", null);
- if (userFullName != null) {
- BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
- userFullNameQuery.add(parser.parse(userFullName, "text"), Occur.MUST);
- IndexSearcher investigationUserSearcher = getSearcher(map, "InvestigationUser");
- Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", userFullNameQuery.build(),
- investigationUserSearcher, ScoreMode.None);
- theQuery.add(toQuery, Occur.MUST);
- }
+ /**
+ * Performs range based faceting on the provided facetedDimension, if possible.
+ *
+ * @param maxLabels The maximum number of labels to collect for each
+ * facet
+ * @param facetStrings Whether there a String dimensions that will need
+ * faceting later
+ * @param facetsCollector Lucene FacetsCollector used to count results
+ * @param facetedDimension Representation of the dimension to facet, and used to
+ * store the results of the faceting
+ * @return If a string dimension was encountered, returns true. Otherwise,
+ * returns the value of facetStrings originally passed.
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private boolean facetRanges(int maxLabels, boolean facetStrings, FacetsCollector facetsCollector,
+ FacetedDimension facetedDimension) throws IOException, LuceneException {
+ if (facetedDimension.getRanges().size() > 0) {
+ logger.debug("Ranges: {}", facetedDimension.getRanges().get(0).getClass().getSimpleName());
+ // Perform range based facets for a numeric field
+ String dimension = facetedDimension.getDimension();
+ Facets facets;
+ if (DocumentMapping.longFields.contains(dimension)) {
+ LongRange[] ranges = facetedDimension.getRanges().toArray(new LongRange[0]);
+ facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
+ } else if (DocumentMapping.doubleFields.contains(dimension)) {
+ DoubleRange[] ranges = facetedDimension.getRanges().toArray(new DoubleRange[0]);
+ facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
+ } else {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "'ranges' specified for dimension " + dimension
+ + " but this is not a supported numeric field");
+ }
+ FacetResult facetResult = facets.getTopChildren(maxLabels, dimension);
+ facetedDimension.addResult(facetResult);
+ } else {
+ // Have a specific string dimension to facet, but these should all be done at
+ // once for efficiency
+ facetStrings = true;
+ }
+ return facetStrings;
+ }
- search.query = maybeEmptyQuery(theQuery);
+ /**
+ * Performs String based faceting. Either this will be sparse (all fields
+ * targetted) or it will occur for specifc fields only.
+ *
+ * @param search Bucket being used for this search
+ * @param maxLabels The maximum number of labels to collect for each facet
+ * @param sparse Whether to perform sparse faceting (faceting across
+ * all String fields)
+ * @param facetStrings Whether specific String dimensions should be faceted
+ * @param indexSearcher Lucene IndexSearcher used to generate the ReaderState
+ * @param facetsCollector Lucene FacetsCollector used to count results
+ * @param state Lucene State used to count results
+ * @throws IOException
+ */
+ private void facetStrings(SearchBucket search, int maxLabels, boolean sparse, boolean facetStrings,
+ IndexSearcher indexSearcher, FacetsCollector facetsCollector, DefaultSortedSetDocValuesReaderState state)
+ throws IOException {
+ try {
+ logger.trace("String faceting");
+ Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+ if (sparse) {
+ // Facet all applicable string fields
+ addFacetResults(maxLabels, search.dimensions, facets);
+ logger.trace("Sparse string faceting found results for {} dimensions", search.dimensions.size());
+ } else if (facetStrings) {
+ // Only add facets to the results if they match one of the requested dimensions
+ List facetResults = facets.getAllDims(maxLabels);
+ for (FacetResult facetResult : facetResults) {
+ String dimension = facetResult.dim.replace(".keyword", "");
+ FacetedDimension facetedDimension = search.dimensions.get(dimension);
+ logger.trace("String facets found for {}, requested dimensions were {}", dimension,
+ search.dimensions.keySet());
+ if (facetedDimension != null) {
+ facetedDimension.addResult(facetResult);
+ }
+ }
}
- logger.info("Query: {}", search.query);
- return luceneSearchResult("Investigation", search, maxResults, uid);
- } catch (Exception e) {
- logger.error("Error", e);
- freeSearcher(uid);
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ } catch (IllegalArgumentException e) {
+ // This can occur if no fields in the index have been faceted
+ logger.error(
+ "No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+ } catch (IllegalStateException e) {
+ // This can occur if we do not create the IndexSearcher from the same
+ // DirectoryReader as we used to create the state
+ logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
+ + e.getClass() + " " + e.getMessage());
+ throw e;
}
+ }
+ /**
+ * Add Facets for all dimensions. This will create FacetDimension Objects if the
+ * do not already exist in the facetedDimensionMap, otherwise the counts for
+ * each label will be aggregated.
+ *
+ * @param maxLabels The maximum number of labels for a given
+ * dimension. This labels with the highest counts are
+ * returned first.
+ * @param facetedDimensionMap Map containing the dimensions that have been or
+ * should be faceted.
+ * @param facets Lucene facets object containing all dimensions.
+ * @throws IOException
+ */
+ private void addFacetResults(int maxLabels, Map facetedDimensionMap, Facets facets)
+ throws IOException {
+ for (FacetResult facetResult : facets.getAllDims(maxLabels)) {
+ String dim = facetResult.dim.replace(".keyword", "");
+ logger.trace("Sparse faceting: FacetResult for {}", dim);
+ FacetedDimension facetedDimension = facetedDimensionMap.get(dim);
+ if (facetedDimension == null) {
+ facetedDimension = new FacetedDimension(dim);
+ facetedDimensionMap.put(dim, facetedDimension);
+ }
+ facetedDimension.addResult(facetResult);
+ }
}
- @GET
- @Produces(MediaType.APPLICATION_JSON)
- @Path("investigations/{uid}")
- public String investigationsAfter(@PathParam("uid") long uid, @QueryParam("maxResults") int maxResults)
- throws LuceneException {
+ /**
+ * Perform search on the specified entity/index.
+ *
+ * @param request Incoming Http request containing the query as Json.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results to include in the returned
+ * Json.
+ * @param sort String of Json representing the sort criteria.
+ * @param searchType The type of search query to build, corresponding to one of
+ * the main entities.
+ * @return String of Json representing the results of the search.
+ * @throws LuceneException
+ */
+ private String searchEntity(HttpServletRequest request, String searchAfter, int maxResults, String sort,
+ SearchType searchType) throws LuceneException {
+ SearchBucket search = null;
try {
- Search search = searches.get(uid);
- try {
- return luceneSearchResult("Investigation", search, maxResults, null);
- } catch (Exception e) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
- }
- } catch (Exception e) {
- freeSearcher(uid);
+ search = new SearchBucket(this, searchType, request, sort, searchAfter);
+ return luceneSearchResult(searchType.toString(), search, searchAfter, maxResults);
+ } catch (IOException | QueryNodeException e) {
+ logger.error("Error", e);
throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ } finally {
+ freeSearcher(search);
}
}
- @POST
- @Path("lock/{entityName}")
- public void lock(@PathParam("entityName") String entityName) throws LuceneException {
- logger.info("Requesting lock of {} index", entityName);
- IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
-
- if (!bucket.locked.compareAndSet(false, true)) {
- throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
+ /**
+ * Perform search on name.
+ *
+ * @param name Entity/index to search.
+ * @param search SearchBucket containing the search query, dimensions to
+ * facet etc.
+ * @param searchAfter String of Json representing the last Lucene Document from
+ * a previous search.
+ * @param maxResults The maximum number of results from the search.
+ * @return String of Json representing the results of the search.
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private String luceneSearchResult(String name, SearchBucket search, String searchAfter, int maxResults)
+ throws IOException, LuceneException {
+ List searchers = getSearchers(search.searcherMap, name);
+ List shards = getShards(name);
+ String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}, fields {}";
+ logger.debug(format, name, search.query, maxResults, searchAfter, search.scored, search.fields);
+ TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards);
+ ScoreDoc[] hits = topFieldDocs.scoreDocs;
+ TotalHits totalHits = topFieldDocs.totalHits;
+ SortField[] fields = topFieldDocs.fields;
+ Float maxScore = Float.NaN;
+ if (hits.length > 0) {
+ maxScore = hits[0].score;
}
+ logger.debug("{} maxscore {}", totalHits, maxScore);
+ return encodeResults(name, search, maxResults, searchers, hits, fields);
+ }
+
+ /**
+ * Performs a search by iterating over all relevant shards.
+ *
+ * @param search SearchBucket containing the search query, dimensions to
+ * facet etc.
+ * @param maxResults The maximum number of results from the search.
+ * @param shards List of all ShardBuckets for the entity to be searched.
+ * @return Lucene TopFieldDocs resulting from the search.
+ * @throws IOException
+ * @throws LuceneException If the search runs for longer than the allowed time
+ */
+ private TopFieldDocs searchShards(SearchBucket search, int maxResults, List shards)
+ throws IOException, LuceneException {
+
+ TopFieldDocs topFieldDocs;
+ Counter clock = TimeLimitingCollector.getGlobalCounter();
+ TimeLimitingCollector collector = new TimeLimitingCollector(null, clock, maxSearchTimeSeconds * 1000);
+
try {
- bucket.indexWriter.deleteAll();
- } catch (IOException e) {
- throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+ List shardHits = new ArrayList<>();
+ int doc = search.searchAfter != null ? search.searchAfter.doc : -1;
+ for (ShardBucket shard : shards) {
+ // Handle the possibility of some shards having a higher docCount than the doc
+ // id on searchAfter
+ int docCount = shard.documentCount.intValue();
+ if (search.searchAfter != null) {
+ if (doc > docCount) {
+ search.searchAfter.doc = docCount - 1;
+ } else {
+ search.searchAfter.doc = doc;
+ }
+ }
+
+ // Wrap Collector with TimeLimitingCollector
+ TopFieldCollector topFieldCollector = TopFieldCollector.create(search.sort, maxResults,
+ search.searchAfter, maxResults);
+ collector.setCollector(topFieldCollector);
+
+ IndexSearcher indexSearcher = shard.searcherManager.acquire();
+ try {
+ indexSearcher.search(search.query, collector);
+ TopFieldDocs topDocs = topFieldCollector.topDocs();
+ if (search.scored) {
+ TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, search.query);
+ }
+ shardHits.add(topDocs);
+ } finally {
+ shard.searcherManager.release(indexSearcher);
+ }
+ }
+ topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[0]),
+ true);
+
+ return topFieldDocs;
+
+ } catch (TimeExceededException e) {
+ String message = "Search cancelled for exceeding " + maxSearchTimeSeconds + " seconds";
+ throw new LuceneException(HttpURLConnection.HTTP_GATEWAY_TIMEOUT, message);
}
}
- private String luceneSearchResult(String name, Search search, int maxResults, Long uid) throws IOException {
- IndexSearcher isearcher = getSearcher(search.map, name);
- logger.debug("To search in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
- search.lastDoc);
- TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
- : isearcher.searchAfter(search.lastDoc, search.query, maxResults);
- ScoreDoc[] hits = topDocs.scoreDocs;
- logger.debug("Hits " + topDocs.totalHits + " maxscore " + topDocs.getMaxScore());
+ /**
+ * Encodes the results of a search into Json.
+ *
+ * @param name Entity/index that has been searched search
+ * @param search SearchBucket containing the search query, dimensions to
+ * facet etc.
+ * @param maxResults The maximum number of results from the search
+ * @param searchers List of IndexSearchers for the given name
+ * @param hits Array of the scored hits from the search
+ * @param fields SortFields that were used to sort the hits
+ * @return String of Json encoded results
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private String encodeResults(String name, SearchBucket search, int maxResults, List searchers,
+ ScoreDoc[] hits, SortField[] fields) throws IOException, LuceneException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int shardIndex = -1;
try (JsonGenerator gen = Json.createGenerator(baos)) {
gen.writeStartObject();
- if (uid != null) {
- gen.write("uid", uid);
- }
gen.writeStartArray("results");
for (ScoreDoc hit : hits) {
- Document doc = isearcher.doc(hit.doc);
- gen.writeStartArray();
- gen.write(Long.parseLong(doc.get("id")));
- gen.write(hit.score);
- gen.writeEnd(); // array
+ shardIndex = hit.shardIndex;
+ encodeResult(name, gen, hit, searchers.get(shardIndex), search);
}
gen.writeEnd(); // array results
- gen.writeEnd(); // object
+ if (hits.length == maxResults) {
+ ScoreDoc lastDoc = hits[hits.length - 1];
+ shardIndex = lastDoc.shardIndex;
+ gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", shardIndex);
+ float lastScore = lastDoc.score;
+ if (!Float.isNaN(lastScore)) {
+ gen.write("score", lastScore);
+ }
+ if (fields != null) {
+ Document lastDocument = searchers.get(shardIndex).doc(lastDoc.doc);
+ gen.writeStartArray("fields");
+ for (SortField sortField : fields) {
+ encodeSearchAfterField(gen, sortField, lastDoc, lastDocument);
+ }
+ gen.writeEnd(); // end "fields" array
+ }
+ gen.writeEnd(); // end "search_after" object
+ }
+ gen.writeEnd(); // end enclosing object
+ } catch (ArrayIndexOutOfBoundsException e) {
+ String message = "Attempting to access searcher with shardIndex " + shardIndex + ", but only have "
+ + searchers.size() + " searchers in total";
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, message);
}
-
- search.lastDoc = hits.length == 0 ? null : hits[hits.length - 1];
- logger.debug("Json returned {}", baos.toString());
+ logger.trace("Json returned {}", baos);
return baos.toString();
}
- private Query maybeEmptyQuery(Builder theQuery) {
- Query query = theQuery.build();
- if (query.toString().isEmpty()) {
- query = new MatchAllDocsQuery();
+ /**
+ * Encodes a single SortField used in the search into the Json as to enable the
+ * ability to "search after" the last result of a previous search.
+ *
+ * @param gen JsonGenerator used to encode the results
+ * @param sortField SortField used to sort the hits
+ * @param lastDoc The final scored hit of the search
+ * @param lastDocument The full Document corresponding to the last hit of the
+ * search
+ * @throws LuceneException
+ */
+ private void encodeSearchAfterField(JsonGenerator gen, SortField sortField, ScoreDoc lastDoc, Document lastDocument)
+ throws LuceneException {
+ String fieldName = sortField.getField();
+ if (fieldName == null) {
+ // SCORE sorting will have a null fieldName
+ if (Float.isFinite(lastDoc.score)) {
+ gen.write(lastDoc.score);
+ }
+ return;
+ }
+ IndexableField indexableField = lastDocument.getField(fieldName);
+ if (indexableField == null) {
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
+ + " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
+ }
+ Type type = (sortField instanceof SortedNumericSortField)
+ ? ((SortedNumericSortField) sortField).getNumericType()
+ : sortField.getType();
+ switch (type) {
+ case LONG:
+ if (indexableField.numericValue() != null) {
+ gen.write(indexableField.numericValue().longValue());
+ } else if (indexableField.stringValue() != null) {
+ gen.write(Long.valueOf(indexableField.stringValue()));
+ }
+ break;
+ case DOUBLE:
+ if (indexableField.numericValue() != null) {
+ gen.write(indexableField.numericValue().doubleValue());
+ } else if (indexableField.stringValue() != null) {
+ gen.write(Double.valueOf(indexableField.stringValue()));
+ }
+ break;
+ case STRING:
+ gen.write(indexableField.stringValue());
+ break;
+ default:
+ throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+ "SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
+ }
+ }
+
+ /**
+ * Builds a Lucene Document from the parsed json.
+ *
+ * @param json Key value pairs of fields.
+ * @return Lucene Document.
+ */
+ private Document parseDocument(JsonObject json) {
+ Document document = new Document();
+ for (String key : json.keySet()) {
+ Field field = new Field(json, key, facetFields);
+ field.addToDocument(document);
+ convertUnits(json, document, key);
}
- logger.debug("Lucene query {}", query);
- return query;
+ return document;
}
- private Builder parseParameter(JsonValue p) {
- JsonObject parameter = (JsonObject) p;
- BooleanQuery.Builder paramQuery = new BooleanQuery.Builder();
- String pName = parameter.getString("name", null);
- if (pName != null) {
- paramQuery.add(new WildcardQuery(new Term("name", pName)), Occur.MUST);
+ /**
+ * If key is "type.units", all relevant numeric fields are converted to SI units
+ * and added to the document.
+ *
+ * @param json A JsonObject representing the Document to be built
+ * @param document The new Document being built
+ * @param key A key present in json
+ * @retrun Whether a conversion has been performed or not
+ */
+ private boolean convertUnits(JsonObject json, Document document, String key) {
+ // Whenever the units are set or changed, convert to SI
+ if (key.equals("type.units")) {
+ String unitString = json.getString("type.units");
+ convertValue(document, json, unitString, "numericValue");
+ convertValue(document, json, unitString, "rangeTop");
+ convertValue(document, json, unitString, "rangeBottom");
+ return true;
}
+ return false;
+ }
- String pUnits = parameter.getString("units", null);
- if (pUnits != null) {
- paramQuery.add(new WildcardQuery(new Term("units", pUnits)), Occur.MUST);
+ /**
+ * Attempts to convert numericFieldName from json into SI units from its
+ * recorded unitString, and then add it to the Lucene document.
+ *
+ * @param document Lucene Document to add the field to.
+ * @param json JsonObject containing the field/value pairs to be
+ * added.
+ * @param unitString Units of the value to be converted.
+ * @param numericFieldName Name (key) of the field to convert and add.
+ */
+ private void convertValue(Document document, JsonObject json, String unitString, String numericFieldName) {
+ IndexableField field = document.getField(numericFieldName);
+ double numericalValue;
+ if (field != null) {
+ numericalValue = NumericUtils.sortableLongToDouble(field.numericValue().longValue());
+ } else if (json.containsKey(numericFieldName)) {
+ numericalValue = json.getJsonNumber(numericFieldName).doubleValue();
+ } else {
+ // If we aren't dealing with the desired numeric field don't convert
+ return;
+ }
+ logger.trace("Attempting to convert {} {}", numericalValue, unitString);
+ Value value = icatUnits.convertValueToSiUnits(numericalValue, unitString);
+ if (value != null) {
+ document.add(new StringField("type.unitsSI", value.units, Store.YES));
+ document.add(new DoublePoint(numericFieldName + "SI", value.numericalValue));
+ document.add(new StoredField(numericFieldName + "SI", value.numericalValue));
+ long sortableLong = NumericUtils.doubleToSortableLong(value.numericalValue);
+ document.add(new NumericDocValuesField(numericFieldName + "SI", sortableLong));
}
- String pStringValue = parameter.getString("stringValue", null);
- String pLowerDateValue = parameter.getString("lowerDateValue", null);
- String pUpperDateValue = parameter.getString("upperDateValue", null);
- Double pLowerNumericValue = parameter.containsKey("lowerNumericValue")
- ? parameter.getJsonNumber("lowerNumericValue").doubleValue() : null;
- Double pUpperNumericValue = parameter.containsKey("upperNumericValue")
- ? parameter.getJsonNumber("upperNumericValue").doubleValue() : null;
- if (pStringValue != null) {
- paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
- } else if (pLowerDateValue != null && pUpperDateValue != null) {
- paramQuery.add(new TermRangeQuery("dateTimeValue", new BytesRef(pLowerDateValue),
- new BytesRef(pUpperDateValue), true, true), Occur.MUST);
+ }
- } else if (pLowerNumericValue != null && pUpperNumericValue != null) {
- paramQuery.add(NumericRangeQuery.newDoubleRange("numericValue", pLowerNumericValue, pUpperNumericValue,
- true, true), Occur.MUST);
+ /**
+ * Returns a new Lucene Document that has the same fields as were present in
+ * oldDocument, except in cases where json has an entry for that field. In this
+ * case, the json value is used instead.
+ *
+ * @param json Key value pairs of fields to overwrite fields already
+ * present in oldDocument.
+ * @param oldDocument Lucene Document to be updated.
+ * @return Lucene Document with updated fields.
+ */
+ private Document updateDocumentFields(JsonObject json, Document oldDocument) {
+ Document newDocument = new Document();
+ List fieldsSI = new ArrayList<>();
+ boolean hasNewUnits = false;
+ for (IndexableField field : oldDocument.getFields()) {
+ String fieldName = field.name();
+ if (json.containsKey(fieldName)) {
+ Field jsonField = new Field(json, fieldName, facetFields);
+ jsonField.addToDocument(newDocument);
+ hasNewUnits = hasNewUnits || convertUnits(json, newDocument, fieldName);
+ } else if (fieldName.endsWith("SI")) {
+ fieldsSI.add(new Field(field, facetFields));
+ } else {
+ Field oldField = new Field(field, facetFields);
+ oldField.addToDocument(newDocument);
+ }
+ }
+ if (!hasNewUnits) {
+ fieldsSI.forEach((field) -> {
+ field.addToDocument(newDocument);
+ });
}
- return paramQuery;
+ return newDocument;
}
+ /**
+ * Returns a new Lucene Document that has the same fields as were present in
+ * oldDocument, except those provided as an argument to prune.
+ *
+ * @param fields These fields will not
+ * be present in the returned Document.
+ * @param oldDocument Lucene Document to be pruned.
+ * @return Lucene Document with pruned fields.
+ */
+ private Document pruneDocument(Set fields, Document oldDocument) {
+ Document newDocument = new Document();
+ for (IndexableField field : oldDocument.getFields()) {
+ if (!fields.contains(field.name())) {
+ Field fieldToAdd = new Field(field, facetFields);
+ fieldToAdd.addToDocument(newDocument);
+ }
+ }
+ return newDocument;
+ }
+
+ /**
+ * Unlocks the specified index after population, committing all pending
+ * documents
+ * and allowing normal modify operations again.
+ *
+ * @param entityName Name of the entity/index to unlock.
+ * @throws LuceneException If not locked, or if there's an IOException when
+ * committing documents.
+ */
@POST
@Path("unlock/{entityName}")
public void unlock(@PathParam("entityName") String entityName) throws LuceneException {
logger.debug("Requesting unlock of {} index", entityName);
- IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
+ IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
if (!bucket.locked.compareAndSet(true, false)) {
throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
"Lucene is not currently locked for " + entityName);
}
try {
- int cached = bucket.indexWriter.numRamDocs();
- bucket.indexWriter.commit();
- if (cached != 0) {
- logger.debug("Unlock has committed {} {} changes to Lucene - now have {} documents indexed", cached,
- entityName, bucket.indexWriter.numDocs());
- }
- bucket.searcherManager.maybeRefreshBlocking();
+ bucket.commit("Unlock", entityName);
} catch (IOException e) {
throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
}
}
+ /**
+ * Updates an existing Lucene document, provided that the target index is not
+ * locked for another operation.
+ *
+ * @param operationBody JsonObject containing the "_index" that the new "doc"
+ * should be created in.
+ * @throws LuceneException
+ * @throws NumberFormatException
+ * @throws IOException
+ */
+ private void update(JsonObject operationBody) throws LuceneException, NumberFormatException, IOException {
+ String entityName = operationBody.getString("_index");
+ if (DocumentMapping.relationships.containsKey(entityName)) {
+ updateByRelation(operationBody, false);
+ }
+ if (DocumentMapping.indexedEntities.contains(entityName)) {
+ long icatId = operationBody.getJsonNumber("_id").longValueExact();
+ JsonObject documentObject = operationBody.getJsonObject("doc");
+ Document document = parseDocument(documentObject);
+ IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
+ if (bucket.locked.get()) {
+ throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+ "Lucene locked for " + entityName);
+ }
+ // Special case for filesizes
+ if (aggregateFiles && entityName.equals("Datafile")) {
+ JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
+ if (jsonFileSize != null) {
+ long sizeToSubtract = 0;
+ List datafileSearchers = bucket.acquireSearchers();
+ for (IndexSearcher datafileSearcher : datafileSearchers) {
+ TopDocs topDocs = datafileSearcher.search(LongPoint.newExactQuery("id", icatId), 1);
+ if (topDocs.totalHits.value == 1) {
+ int docId = topDocs.scoreDocs[0].doc;
+ Document datasetDocument = datafileSearcher.doc(docId);
+ sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
+ long sizeToAdd = jsonFileSize.longValueExact();
+ if (sizeToAdd != sizeToSubtract) {
+ JsonNumber datasetId = documentObject.getJsonNumber("dataset.id");
+ JsonNumber investigationId = documentObject.getJsonNumber("investigation.id");
+ aggregateFileSize(sizeToAdd, sizeToSubtract, 0, datasetId, "dataset");
+ aggregateFileSize(sizeToAdd, sizeToSubtract, 0, investigationId, "investigation");
+ }
+ break;
+ }
+ }
+ }
+ }
+ logger.trace("update: {}", document);
+ bucket.updateDocument(icatId, facetsConfig.build(document));
+ }
+ }
+
+ /**
+ * Updates an existing Lucene document, provided that the target index is not
+ * locked
+ * for another operation. In this case, the entity being updated does not have
+ * its own index, but exists as fields on a parent. For example,
+ * InvestigationType on an Investigation.
+ *
+ * @param operationBody JsonObject containing the "_index" that the new "doc"
+ * should be created in.
+ * @param delete Whether to delete the related entity (or just update its
+ * values).
+ * @throws LuceneException
+ * @throws NumberFormatException
+ * @throws IOException
+ */
+ private void updateByRelation(JsonObject operationBody, boolean delete)
+ throws LuceneException, NumberFormatException, IOException {
+ for (DocumentMapping.ParentRelationship parentRelationship : DocumentMapping.relationships
+ .get(operationBody.getString("_index"))) {
+ long childId = operationBody.getJsonNumber("_id").longValueExact();
+ IndexBucket bucket = indexBuckets.computeIfAbsent(parentRelationship.parentName.toLowerCase(),
+ k -> new IndexBucket(k));
+ if (bucket.locked.get()) {
+ throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+ "Lucene locked for " + parentRelationship.parentName);
+ }
+ IndexSearcher searcher = getSearcher(new HashMap<>(), parentRelationship.parentName);
+
+ int blockSize = 10000;
+ Query query = LongPoint.newExactQuery(parentRelationship.joiningField, childId);
+ Sort sort = new Sort(new SortField("id", Type.LONG));
+ ScoreDoc[] scoreDocs = searcher.search(query, blockSize, sort).scoreDocs;
+ while (scoreDocs.length != 0) {
+ for (ScoreDoc scoreDoc : scoreDocs) {
+ Document oldDocument = searcher.doc(scoreDoc.doc);
+ long parentId = oldDocument.getField("id").numericValue().longValue();
+ Document newDocument = delete ? pruneDocument(parentRelationship.fields, oldDocument)
+ : updateDocumentFields(operationBody.getJsonObject("doc"), oldDocument);
+ logger.trace("updateByRelation: {}", newDocument);
+ bucket.updateDocument(parentId, facetsConfig.build(newDocument));
+ }
+ scoreDocs = searcher.searchAfter(scoreDocs[scoreDocs.length - 1], query, blockSize, sort).scoreDocs;
+ }
+ }
+ }
+
}
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
new file mode 100644
index 0000000..2c51f76
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -0,0 +1,916 @@
+package org.icatproject.lucene;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringReader;
+import java.net.HttpURLConnection;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.Map.Entry;
+
+import jakarta.json.Json;
+import jakarta.json.JsonArray;
+import jakarta.json.JsonNumber;
+import jakarta.json.JsonObject;
+import jakarta.json.JsonReader;
+import jakarta.json.JsonString;
+import jakarta.json.JsonValue;
+import jakarta.json.JsonValue.ValueType;
+import jakarta.servlet.http.HttpServletRequest;
+
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.facet.range.DoubleRange;
+import org.apache.lucene.facet.range.LongRange;
+import org.apache.lucene.facet.range.Range;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.TermInSetQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery.Builder;
+import org.apache.lucene.search.SortField.Type;
+import org.apache.lucene.search.join.JoinUtil;
+import org.apache.lucene.search.join.ScoreMode;
+import org.apache.lucene.util.BytesRef;
+import org.icatproject.lucene.exceptions.LuceneException;
+import org.icatproject.utils.IcatUnits.Value;
+
+/**
+ * Bucket for information relating to a single search.
+ */
+public class SearchBucket {
+
+ public enum SearchType {
+ DATAFILE, DATASET, INVESTIGATION, GENERIC
+ }
+
+ private Lucene lucene;
+ public Map> searcherMap;
+ public Query query;
+ public Sort sort;
+ public FieldDoc searchAfter;
+ public boolean scored;
+ public Set fields = new HashSet<>();
+ public Map> joinedFields = new HashMap<>();
+ public Map dimensions = new HashMap<>();
+ private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
+
+ static {
+ TimeZone tz = TimeZone.getTimeZone("GMT");
+ df.setTimeZone(tz);
+ }
+
+ /**
+ * Creates an empty search bucket.
+ *
+ * @param lucene IcatLucene instance.
+ */
+ public SearchBucket(Lucene lucene) {
+ this.lucene = lucene;
+ searcherMap = new HashMap<>();
+ }
+
+ /**
+ * Creates a new search from the provided request and Url parameters.
+ *
+ * @param lucene IcatLucene instance.
+ * @param searchType The SearchType determines how the query is built for
+ * specific entities.
+ * @param request Incoming Http request containing the query as Json.
+ * @param sort Sort criteria as a Json encoded string.
+ * @param searchAfter The last FieldDoc of a previous search, encoded as Json.
+ * @throws LuceneException
+ * @throws IOException
+ * @throws QueryNodeException
+ */
+ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest request, String sort,
+ String searchAfter) throws LuceneException, IOException, QueryNodeException {
+ this.lucene = lucene;
+ searcherMap = new HashMap<>();
+ parseSort(sort);
+ try (JsonReader r = Json.createReader(request.getInputStream())) {
+ JsonObject o = r.readObject();
+ parseFields(o);
+ parseDimensions(o);
+ JsonObject jsonQuery = o.getJsonObject("query");
+ switch (searchType) {
+ case GENERIC:
+ parseGenericQuery(jsonQuery);
+ return;
+ case DATAFILE:
+ parseDatafileQuery(searchAfter, jsonQuery);
+ return;
+ case DATASET:
+ parseDatasetQuery(searchAfter, jsonQuery);
+ return;
+ case INVESTIGATION:
+ parseInvestigationQuery(searchAfter, jsonQuery);
+ return;
+ }
+ } catch (QueryNodeParseException e) {
+ String message = "Search term could not be parsed due to syntax errors";
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
+ }
+ }
+
+ private void parseDatafileQuery(String searchAfter, JsonObject jsonQuery)
+ throws LuceneException, IOException, QueryNodeException {
+ BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+ parseSearchAfter(searchAfter);
+ buildFilterQueries("datafile", jsonQuery, luceneQuery);
+
+ String userName = jsonQuery.getString("user", null);
+ if (userName != null) {
+ buildUserNameQuery(userName, luceneQuery, "investigation.id");
+ }
+
+ String text = jsonQuery.getString("text", null);
+ if (text != null) {
+ luceneQuery.add(DocumentMapping.datafileParser.parse(text, null), Occur.MUST);
+ }
+
+ buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "date");
+
+ if (jsonQuery.containsKey("parameters")) {
+ JsonArray parameters = jsonQuery.getJsonArray("parameters");
+ IndexSearcher datafileParameterSearcher = lucene.getSearcher(searcherMap, "DatafileParameter");
+ for (JsonValue p : parameters) {
+ BooleanQuery.Builder paramQuery = parseParameter(p);
+ Query toQuery = JoinUtil.createJoinQuery("datafile.id", false, "id", Long.class, paramQuery.build(),
+ datafileParameterSearcher, ScoreMode.None);
+ luceneQuery.add(toQuery, Occur.MUST);
+ }
+ }
+ query = maybeEmptyQuery(luceneQuery);
+ }
+
+ private void parseDatasetQuery(String searchAfter, JsonObject jsonQuery)
+ throws LuceneException, IOException, QueryNodeException {
+ BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+ parseSearchAfter(searchAfter);
+ buildFilterQueries("dataset", jsonQuery, luceneQuery);
+
+ String userName = jsonQuery.getString("user", null);
+ if (userName != null) {
+ buildUserNameQuery(userName, luceneQuery, "investigation.id");
+ }
+
+ String text = jsonQuery.getString("text", null);
+ if (text != null) {
+ luceneQuery.add(DocumentMapping.datasetParser.parse(text, null), Occur.MUST);
+ }
+
+ buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
+
+ if (jsonQuery.containsKey("parameters")) {
+ JsonArray parameters = jsonQuery.getJsonArray("parameters");
+ IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "DatasetParameter");
+ for (JsonValue p : parameters) {
+ BooleanQuery.Builder paramQuery = parseParameter(p);
+ Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", Long.class, paramQuery.build(),
+ parameterSearcher, ScoreMode.None);
+ luceneQuery.add(toQuery, Occur.MUST);
+ }
+ }
+ query = maybeEmptyQuery(luceneQuery);
+ }
+
+ private void parseInvestigationQuery(String searchAfter, JsonObject jsonQuery)
+ throws LuceneException, IOException, QueryNodeException {
+ BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+ parseSearchAfter(searchAfter);
+ buildFilterQueries("investigation", jsonQuery, luceneQuery);
+
+ String userName = jsonQuery.getString("user", null);
+ if (userName != null) {
+ buildUserNameQuery(userName, luceneQuery, "id");
+ }
+
+ String text = jsonQuery.getString("text", null);
+ if (text != null) {
+ Builder textBuilder = new BooleanQuery.Builder();
+ textBuilder.add(DocumentMapping.investigationParser.parse(text, null), Occur.SHOULD);
+
+ IndexSearcher sampleSearcher = lucene.getSearcher(searcherMap, "Sample");
+ Query joinedSampleQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id", Long.class,
+ DocumentMapping.sampleParser.parse(text, null), sampleSearcher, ScoreMode.Avg);
+ textBuilder.add(joinedSampleQuery, Occur.SHOULD);
+ luceneQuery.add(textBuilder.build(), Occur.MUST);
+ }
+
+ buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
+
+ if (jsonQuery.containsKey("parameters")) {
+ JsonArray parameters = jsonQuery.getJsonArray("parameters");
+ IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "InvestigationParameter");
+ for (JsonValue p : parameters) {
+ BooleanQuery.Builder paramQuery = parseParameter(p);
+ Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", Long.class,
+ paramQuery.build(),
+ parameterSearcher, ScoreMode.None);
+ luceneQuery.add(toQuery, Occur.MUST);
+ }
+ }
+
+ String userFullName = jsonQuery.getString("userFullName", null);
+ if (userFullName != null) {
+ BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
+ userFullNameQuery.add(DocumentMapping.genericParser.parse(userFullName, "user.fullName"),
+ Occur.MUST);
+ IndexSearcher investigationUserSearcher = lucene.getSearcher(searcherMap, "InvestigationUser");
+ Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", Long.class,
+ userFullNameQuery.build(),
+ investigationUserSearcher, ScoreMode.None);
+ luceneQuery.add(toQuery, Occur.MUST);
+ }
+ query = maybeEmptyQuery(luceneQuery);
+ }
+
+ /**
+ * Extracts values from queryJson in order to add one or more range query terms
+ * using queryBuilder.
+ *
+ * Note that values in queryJson are expected to be precise only to the minute,
+ * and so to ensure that our range is inclusive, we add 59.999 seconds onto the
+ * upper value only.
+ *
+ * If either upper or lower keys do not yield values then a half open range is
+ * created. If both are absent, then nothing is added to the query.
+ *
+ * @param queryBuilder Builder for the Lucene query.
+ * @param queryJson JsonObject representing the query parameters.
+ * @param lowerKey Key in queryJson of the lower date value
+ * @param upperKey Key in queryJson of the upper date value
+ * @param fields Name of one or more fields to apply the range query to.
+ * @throws LuceneException
+ */
+ private void buildDateRanges(Builder queryBuilder, JsonObject queryJson, String lowerKey, String upperKey,
+ String... fields) throws LuceneException {
+ long lower = parseDate(queryJson, lowerKey, 0);
+ long upper = parseDate(queryJson, upperKey, 59999);
+ // Only build the query if at least one of the dates is defined
+ if (lower != Long.MIN_VALUE || upper != Long.MAX_VALUE) {
+ for (String field : fields) {
+ queryBuilder.add(LongPoint.newRangeQuery(field, lower, upper), Occur.MUST);
+ }
+ }
+ }
+
+ /**
+ * Builds Term queries (exact string matches without tokenizing) Range queries
+ * or Nested/Joined queries from the filter
+ * object in the query request.
+ *
+ * @param requestedQuery Json object containing details of the query.
+ * @param queryBuilder Builder for the overall boolean query to be build.
+ * @throws LuceneException If the values in the filter object are neither STRING
+ * nor ARRAY of STRING.
+ * @throws IOException
+ */
+ private void buildFilterQueries(String target, JsonObject requestedQuery, Builder queryBuilder)
+ throws LuceneException, IOException {
+ if (requestedQuery.containsKey("filter")) {
+ JsonObject filterObject = requestedQuery.getJsonObject("filter");
+ for (String key : filterObject.keySet()) {
+ JsonValue value = filterObject.get(key);
+ ValueType valueType = value.getValueType();
+ int i = key.indexOf(".");
+ String filterTarget = i == -1 ? key : key.substring(0, i);
+ String fld = key.substring(i + 1);
+ Query dimensionQuery;
+ if (valueType.equals(ValueType.ARRAY)) {
+ Builder builder = new BooleanQuery.Builder();
+ // If the key was just a nested entity (no ".") then we should FILTER all of our
+ // queries on that entity.
+ Occur occur = i == -1 ? Occur.FILTER : Occur.SHOULD;
+ for (JsonValue arrayValue : filterObject.getJsonArray(key)) {
+ Query arrayQuery = parseFilter(target, fld, arrayValue);
+ builder.add(arrayQuery, occur);
+ }
+ dimensionQuery = builder.build();
+ } else {
+ dimensionQuery = parseFilter(target, fld, value);
+ }
+ // Nest the dimension query if needed
+ if (i != -1 && !target.equals(filterTarget)) {
+ // If we are targeting a different entity, nest the entire array as SHOULD
+ // BUT only if we haven't already nested the queries (as we do when the key was
+ // just a nested entity)
+ IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, filterTarget);
+ Query nestedQuery;
+ if (filterTarget.equals("sample") && target.equals("investigation")) {
+ nestedQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id", Long.class,
+ dimensionQuery, nestedSearcher, ScoreMode.None);
+ } else if (filterTarget.toLowerCase().equals("investigationinstrument") && !target.equals("investigation")) {
+ nestedQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id", Long.class, dimensionQuery,
+ nestedSearcher, ScoreMode.None);
+ } else {
+ nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", Long.class, dimensionQuery,
+ nestedSearcher, ScoreMode.None);
+ }
+ queryBuilder.add(nestedQuery, Occur.FILTER);
+ } else {
+ // Otherwise, just add as SHOULD to the main query directly
+ queryBuilder.add(dimensionQuery, Occur.FILTER);
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses a single filter field value pair into Lucene objects. Can handle
+ * simple strings, range objects or nested filters.
+ *
+ * @param target The target entity of the search, but not necessarily this
+ * filter
+ * @param fld The field to apply the query to
+ * @param value JsonValue (JsonString or JsonObject) to parse a Lucene Query
+ * from
+ * @return A Lucene Query object parsed from the provided value
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private Query parseFilter(String target, String fld, JsonValue value) throws IOException, LuceneException {
+ ValueType valueType = value.getValueType();
+ switch (valueType) {
+ case STRING:
+ // Simplest case involving a single field/value pair
+ return new TermQuery(new Term(fld + ".keyword", ((JsonString) value).getString()));
+
+ case OBJECT:
+ JsonObject valueObject = (JsonObject) value;
+ if (valueObject.containsKey("filter")) {
+ // Parse a nested query
+ IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, fld);
+ List nestedFilters = valueObject.getJsonArray("filter").getValuesAs(JsonObject.class);
+ Builder nestedBoolBuilder = new BooleanQuery.Builder();
+ nestedFilters.forEach(nestedFilter -> {
+ String nestedField = nestedFilter.getString("field");
+ if (nestedFilter.containsKey("value")) {
+ Term term = new Term(nestedField + ".keyword", nestedFilter.getString("value"));
+ TermQuery query = new TermQuery(term);
+ nestedBoolBuilder.add(query, Occur.FILTER);
+ } else if (nestedFilter.containsKey("exact")) {
+ buildNestedExactQuery(nestedField, nestedFilter, nestedBoolBuilder);
+ } else {
+ buildNestedRangeQuery(nestedField, nestedFilter, nestedBoolBuilder);
+ }
+ });
+ if (fld.contains("sample") && !target.equals("investigation")) {
+ // Datasets and Datafiles join by sample.id on both fields
+ return JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
+ nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
+ } else if (fld.equals("sampleparameter") && target.equals("investigation")) {
+ Query sampleQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
+ nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
+ return JoinUtil.createJoinQuery("sample.investigation.id", false, "id", Long.class, sampleQuery,
+ lucene.getSearcher(searcherMap, "sample"), ScoreMode.None);
+ } else {
+ return JoinUtil.createJoinQuery(target + ".id", false, "id", Long.class,
+ nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
+ }
+ } else {
+ // Single range of values for a field
+ JsonNumber from = valueObject.getJsonNumber("from");
+ JsonNumber to = valueObject.getJsonNumber("to");
+ if (DocumentMapping.longFields.contains(fld)) {
+ return LongPoint.newRangeQuery(fld, from.longValueExact(), to.longValueExact());
+ } else {
+ return DoublePoint.newRangeQuery(fld, from.doubleValue(), to.doubleValue());
+ }
+ }
+
+ default:
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "filter object values should be STRING or OBJECT, but were " + valueType);
+ }
+ }
+
+ /**
+ * Builds an exact numeric query, intended for use with numeric or date/time
+ * parameters.
+ *
+ * @param fld Name of the field to apply the range to.
+ * @param valueObject JsonObject containing "exact", and optionally "units"
+ * as keys for an exact value.
+ * @param builder BooleanQuery.Builder for the nested query
+ */
+ private void buildNestedExactQuery(String fld, JsonObject valueObject, BooleanQuery.Builder builder) {
+ if (DocumentMapping.longFields.contains(fld)) {
+ long exact = valueObject.getJsonNumber("exact").longValueExact();
+ builder.add(LongPoint.newExactQuery(fld, exact), Occur.FILTER);
+ } else {
+ Builder rangeBuilder = new BooleanQuery.Builder();
+ Builder exactOrRangeBuilder = new BooleanQuery.Builder();
+ double exact = valueObject.getJsonNumber("exact").doubleValue();
+ String units = valueObject.getString("units", null);
+ if (units != null) {
+ Value exactValue = lucene.icatUnits.convertValueToSiUnits(exact, units);
+ if (exactValue != null) {
+ // If we were able to parse the units, apply query to the SI value
+ Query topQuery = DoublePoint.newRangeQuery("rangeTopSI", exactValue.numericalValue,
+ Double.POSITIVE_INFINITY);
+ Query bottomQuery = DoublePoint.newRangeQuery("rangeBottomSI", Double.NEGATIVE_INFINITY,
+ exactValue.numericalValue);
+ Query exactQuery = DoublePoint.newExactQuery(fld + "SI", exactValue.numericalValue);
+ rangeBuilder.add(topQuery, Occur.FILTER);
+ rangeBuilder.add(bottomQuery, Occur.FILTER);
+ exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
+ exactOrRangeBuilder.add(exactQuery, Occur.SHOULD);
+ builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
+ } else {
+ // If units could not be parsed, make them part of the query on the raw data
+ rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY),
+ Occur.FILTER);
+ rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact),
+ Occur.FILTER);
+ exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
+ exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld, exact), Occur.SHOULD);
+ builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
+ builder.add(new TermQuery(new Term("type.units", units)), Occur.FILTER);
+ }
+ } else {
+ // If units were not provided, just apply to the raw data
+ rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY), Occur.FILTER);
+ rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact),
+ Occur.FILTER);
+ exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
+ exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld, exact), Occur.SHOULD);
+ builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
+ }
+ }
+ }
+
+ /**
+ * Builds a range query, intended for use with numeric or date/time parameters.
+ *
+ * @param fld Name of the field to apply the range to.
+ * @param valueObject JsonObject containing "from", "to" and optionally "units"
+ * as keys for a range of values.
+ * @param builder BooleanQuery.Builder for the nested query
+ */
+ private void buildNestedRangeQuery(String fld, JsonObject valueObject, BooleanQuery.Builder builder) {
+ if (DocumentMapping.longFields.contains(fld)) {
+ long from = Long.MIN_VALUE;
+ long to = Long.MAX_VALUE;
+ try {
+ from = valueObject.getJsonNumber("from").longValueExact();
+ } catch (ArithmeticException e) {
+ // pass
+ }
+ try {
+ to = valueObject.getJsonNumber("to").longValueExact();
+ } catch (ArithmeticException e) {
+ // pass
+ }
+ builder.add(LongPoint.newRangeQuery(fld, from, to), Occur.FILTER);
+ } else {
+ double from = valueObject.getJsonNumber("from").doubleValue();
+ double to = valueObject.getJsonNumber("to").doubleValue();
+ String units = valueObject.getString("units", null);
+ if (units != null) {
+ Value fromValue = lucene.icatUnits.convertValueToSiUnits(from, units);
+ Value toValue = lucene.icatUnits.convertValueToSiUnits(to, units);
+ if (fromValue != null && toValue != null) {
+ // If we were able to parse the units, apply query to the SI value
+ Query rangeQuery = DoublePoint.newRangeQuery(fld + "SI", fromValue.numericalValue,
+ toValue.numericalValue);
+ builder.add(rangeQuery, Occur.FILTER);
+ } else {
+ // If units could not be parsed, make them part of the query on the raw data
+ builder.add(DoublePoint.newRangeQuery(fld, from, to), Occur.FILTER);
+ builder.add(new TermQuery(new Term("type.units", units)), Occur.FILTER);
+ }
+ } else {
+ // If units were not provided, just apply to the raw data
+ builder.add(DoublePoint.newRangeQuery(fld, from, to), Occur.FILTER);
+ }
+ }
+ }
+
+ /**
+ * Builds a query against InvestigationUser and InstrumentScientist entities
+ * using the provided userName.
+ *
+ * @param userName The value of the user.name field to query for.
+ * @param luceneQuery BooleanQuery.Builder in use for main entity query.
+ * @param toField The field on the main entity to join to, practically
+ * either "id" or "investigation.id".
+ * @throws IOException
+ * @throws LuceneException
+ */
+ private void buildUserNameQuery(String userName, BooleanQuery.Builder luceneQuery, String toField)
+ throws IOException, LuceneException {
+ TermQuery fromQuery = new TermQuery(new Term("user.name", userName));
+ Query investigationUserQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, Long.class,
+ fromQuery, lucene.getSearcher(searcherMap, "InvestigationUser"), ScoreMode.None);
+ Query instrumentScientistQuery = JoinUtil.createJoinQuery("instrument.id", false, "instrument.id", Long.class,
+ fromQuery, lucene.getSearcher(searcherMap, "InstrumentScientist"), ScoreMode.None);
+ Query investigationInstrumentQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, Long.class,
+ instrumentScientistQuery, lucene.getSearcher(searcherMap, "InvestigationInstrument"), ScoreMode.None);
+ Builder userNameQueryBuilder = new BooleanQuery.Builder();
+ userNameQueryBuilder.add(investigationUserQuery, Occur.SHOULD).add(investigationInstrumentQuery, Occur.SHOULD);
+ luceneQuery.add(userNameQueryBuilder.build(), Occur.MUST);
+ }
+
+ /**
+ * Converts String into number of ms since epoch.
+ *
+ * @param value String representing a Date in the format "yyyyMMddHHmm".
+ * @return Number of ms since epoch.
+ * @throws java.text.ParseException
+ */
+ protected static long decodeTime(String value) throws java.text.ParseException {
+ synchronized (df) {
+ return df.parse(value).getTime();
+ }
+ }
+
+ /**
+ * Either builds the query from the provided builder, or creates a
+ * MatchAllDocsQuery to use if the Builder was empty.
+ *
+ * @param luceneQuery BooleanQuery.Builder
+ * @return Lucene Query
+ */
+ private Query maybeEmptyQuery(Builder luceneQuery) {
+ Query query = luceneQuery.build();
+ if (query.toString().isEmpty()) {
+ query = new MatchAllDocsQuery();
+ }
+ return query;
+ }
+
+ /**
+ * Parses a date/time value from jsonObject. Can account for either a Long
+ * value, or a String value encoded in the format yyyyMMddHHmm.
+ *
+ * @param jsonObject JsonObject containing the date to be parsed.
+ * @param key Key of the date/time value in jsonObject.
+ * @param offset In the case of STRING ValueType, add offset ms before
+ * returning. This accounts for the fact the String format
+ * used is only precise to minutes and not seconds.
+ * @return null if jsonObject does not contain the key, number of ms since epoch
+ * otherwise.
+ * @throws LuceneException If the ValueType is not NUMBER or STRING, or if a
+ * STRING value cannot be parsed.
+ */
+ private long parseDate(JsonObject jsonObject, String key, int offset) throws LuceneException {
+ if (jsonObject.containsKey(key)) {
+ ValueType valueType = jsonObject.get(key).getValueType();
+ switch (valueType) {
+ case STRING:
+ String dateString = jsonObject.getString(key);
+ try {
+ return decodeTime(dateString) + offset;
+ } catch (Exception e) {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Could not parse date " + dateString + " using expected format yyyyMMddHHmm");
+ }
+ case NUMBER:
+ return jsonObject.getJsonNumber(key).longValueExact();
+ default:
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Dates should be represented by a NUMBER or STRING JsonValue, but got " + valueType);
+ }
+ }
+ // If the key wasn't present, use eiter MIN_VALUE or MAX_VALUE based on whether
+ // we need to offset the date. This is useful for half open ranges.
+ if (offset == 0) {
+ return Long.MIN_VALUE;
+ } else {
+ return Long.MAX_VALUE;
+ }
+ }
+
+ /**
+ * Parses dimensions to apply faceting to from the incoming Json. If ranges are
+ * specified, these are also parsed.
+ *
+ * @param jsonObject Json from incoming search request.
+ * @throws LuceneException
+ */
+ private void parseDimensions(JsonObject jsonObject) throws LuceneException {
+ if (jsonObject.containsKey("dimensions")) {
+ List dimensionObjects = jsonObject.getJsonArray("dimensions").getValuesAs(JsonObject.class);
+ for (JsonObject dimensionObject : dimensionObjects) {
+ if (!dimensionObject.containsKey("dimension")) {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "'dimension' not specified for facet request " + dimensionObject);
+ }
+ String dimension = dimensionObject.getString("dimension");
+ FacetedDimension facetDimensionRequest = new FacetedDimension(dimension);
+ if (dimensionObject.containsKey("ranges")) {
+ List ranges = facetDimensionRequest.getRanges();
+ List jsonRanges = dimensionObject.getJsonArray("ranges").getValuesAs(JsonObject.class);
+ if (DocumentMapping.longFields.contains(dimension)) {
+ for (JsonObject range : jsonRanges) {
+ long lower = Long.MIN_VALUE;
+ long upper = Long.MAX_VALUE;
+ if (range.containsKey("from")) {
+ lower = range.getJsonNumber("from").longValueExact();
+ }
+ if (range.containsKey("to")) {
+ upper = range.getJsonNumber("to").longValueExact();
+ }
+ String label = lower + "-" + upper;
+ if (range.containsKey("key")) {
+ label = range.getString("key");
+ }
+ ranges.add(new LongRange(label, lower, true, upper, false));
+ }
+ } else if (DocumentMapping.doubleFields.contains(dimension)) {
+ for (JsonObject range : jsonRanges) {
+ double lower = Double.MIN_VALUE;
+ double upper = Double.MAX_VALUE;
+ if (range.containsKey("from")) {
+ lower = range.getJsonNumber("from").doubleValue();
+ }
+ if (range.containsKey("to")) {
+ upper = range.getJsonNumber("to").doubleValue();
+ }
+ String label = lower + "-" + upper;
+ if (range.containsKey("key")) {
+ label = range.getString("key");
+ }
+ ranges.add(new DoubleRange(label, lower, true, upper, false));
+ }
+ } else {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "'ranges' specified for dimension " + dimension
+ + " but this is not a supported numeric field");
+ }
+ }
+ dimensions.put(dimension, facetDimensionRequest);
+ }
+ }
+ }
+
+ /**
+ * Parses the fields to return with the search results from Json.
+ *
+ * @param jsonObject The Json from the search request.
+ * @throws LuceneException If the parsing fails.
+ */
+ public void parseFields(JsonObject jsonObject) throws LuceneException {
+ if (jsonObject.containsKey("fields")) {
+ List fieldStrings = jsonObject.getJsonArray("fields").getValuesAs(JsonString.class);
+ // logger.trace("Parsing fields from {}", fieldStrings);
+ for (JsonString jsonString : fieldStrings) {
+ String[] splitString = jsonString.getString().split(" ");
+ if (splitString.length == 1) {
+ // Fields without a space apply directly to the target entity
+ fields.add(splitString[0]);
+ } else if (splitString.length == 2) {
+ // Otherwise, the first element is the target of a join, with the second being a
+ // field on that joined entity.
+ if (joinedFields.containsKey(splitString[0])) {
+ joinedFields.get(splitString[0]).add(splitString[1]);
+ } else {
+ joinedFields.putIfAbsent(splitString[0],
+ new HashSet(Arrays.asList(splitString[1])));
+ }
+ } else {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Could not parse field: " + jsonString.getString());
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses a query and associated information from an incoming request without
+ * any logic specific to a single index or entity. As such it may not be as
+ * powerful, but is sufficient for simple queries (like those for faceting).
+ *
+ * @param jsonQuery Incoming query request encoded as Json.
+ * @param luceneQuery Lucene BooleanQuery.Builder
+ * @throws LuceneException If the types of the JsonValues in the query do not
+ * match those supported by icat.lucene
+ */
+ private void parseGenericQuery(JsonObject jsonQuery) throws LuceneException {
+ BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+ for (Entry entry : jsonQuery.entrySet()) {
+ String field = entry.getKey();
+ ValueType valueType = entry.getValue().getValueType();
+ switch (valueType) {
+ case STRING:
+ JsonString stringValue = (JsonString) entry.getValue();
+ String fld = lucene.facetFields.contains(field) ? field + ".keyword" : field;
+ luceneQuery.add(new TermQuery(new Term(fld, stringValue.getString())), Occur.MUST);
+ break;
+ case NUMBER:
+ JsonNumber numberValue = (JsonNumber) entry.getValue();
+ if (DocumentMapping.longFields.contains(field)) {
+ luceneQuery.add(LongPoint.newExactQuery(field, numberValue.longValueExact()), Occur.FILTER);
+ } else if (DocumentMapping.doubleFields.contains(field)) {
+ luceneQuery.add(DoublePoint.newExactQuery(field, numberValue.doubleValue()), Occur.FILTER);
+ } else {
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Value had type NUMBER, but field " + field
+ + " is not a known longField or doubleField");
+ }
+ break;
+ case ARRAY:
+ ArrayList longList = new ArrayList<>();
+ ArrayList bytesRefList = new ArrayList<>();
+ JsonArray arrayValue = (JsonArray) entry.getValue();
+ for (JsonValue value : arrayValue) {
+ ValueType arrayValueType = value.getValueType();
+ switch (arrayValueType) {
+ case NUMBER:
+ longList.add(((JsonNumber) value).longValueExact());
+ break;
+ default:
+ bytesRefList.add(new BytesRef(((JsonString) value).getString()));
+ break;
+ }
+ }
+
+ if (longList.size() == 0 && bytesRefList.size() == 0) {
+ query = new MatchNoDocsQuery("Tried filtering" + field + " with an empty array");
+ return;
+ }
+ if (longList.size() != 0) {
+ luceneQuery.add(LongPoint.newSetQuery(field, longList), Occur.MUST);
+ }
+ if (bytesRefList.size() != 0) {
+ luceneQuery.add(new TermInSetQuery(field, bytesRefList), Occur.MUST);
+ }
+ break;
+ default:
+ throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+ "Query values should be ARRAY, STRING or NUMBER, but had value of type " + valueType);
+ }
+ }
+ query = maybeEmptyQuery(luceneQuery);
+ }
+
+ /**
+ * Parses query applying to a single parameter from incoming Json.
+ *
+ * @param p JsonValue (JsonObject) representing a query against a single
+ * parameter.
+ * @return BooleanQuery.Builder for a single parameter.
+ * @throws LuceneException
+ */
+ private Builder parseParameter(JsonValue p) throws LuceneException {
+ JsonObject parameter = (JsonObject) p;
+ BooleanQuery.Builder paramQuery = new BooleanQuery.Builder();
+ String pName = parameter.getString("name", null);
+ if (pName != null) {
+ paramQuery.add(new WildcardQuery(new Term("type.name.keyword", pName)), Occur.MUST);
+ }
+
+ String pUnits = parameter.getString("units", null);
+ if (pUnits != null) {
+ paramQuery.add(new WildcardQuery(new Term("type.units", pUnits)), Occur.MUST);
+ }
+ if (parameter.containsKey("stringValue")) {
+ String pStringValue = parameter.getString("stringValue", null);
+ paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
+ } else if (parameter.containsKey("lowerDateValue") && parameter.containsKey("upperDateValue")) {
+ buildDateRanges(paramQuery, parameter, "lowerDateValue", "upperDateValue", "dateTimeValue");
+ } else if (parameter.containsKey("lowerNumericValue") && parameter.containsKey("upperNumericValue")) {
+ double pLowerNumericValue = parameter.getJsonNumber("lowerNumericValue").doubleValue();
+ double pUpperNumericValue = parameter.getJsonNumber("upperNumericValue").doubleValue();
+ paramQuery.add(DoublePoint.newRangeQuery("numericValue", pLowerNumericValue, pUpperNumericValue),
+ Occur.MUST);
+ }
+ return paramQuery;
+ }
+
+ /**
+ * Parses a Lucene FieldDoc to be "searched after" from a String representation
+ * of a JSON array. null if searchAfter was itself null or an empty String.
+ *
+ * @param searchAfter String representation of a JSON object containing the
+ * document id or "doc" (String), score ("float") in that
+ * order.
+ * @return FieldDoc object built from the provided String, or
+ * @throws LuceneException If an entry in the fields array is not a STRING or
+ * NUMBER
+ */
+ private void parseSearchAfter(String searchAfter) throws LuceneException {
+ if (searchAfter == null || searchAfter.equals("")) {
+ return;
+ }
+ SortField[] sortFields = sort.getSort();
+ JsonReader reader = Json.createReader(new StringReader(searchAfter));
+ JsonObject object = reader.readObject();
+ // shardIndex and Lucene doc Id are always needed to determine tie breaks, even
+ // if the field sort resulted in no ties in the first place
+ int shardIndex = object.getInt("shardIndex");
+ int doc = object.getInt("doc");
+ float score = Float.NaN;
+ List