Update schema and tests

aodn · Sep 3, 2024 · 443acfd · 443acfd
1 parent eb89193
commit 443acfd
Show file tree

Hide file tree

Showing 32 changed files with 855 additions and 1,066 deletions.
diff --git a/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerExtController.java b/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerExtController.java
@@ -88,6 +88,11 @@ public ResponseEntity<List<JsonNode>> getOrganisationVocabsFromArdc() {
     @GetMapping(path="/vocabs/populate")
     @Operation(security = { @SecurityRequirement(name = "X-API-Key") }, description = "Populate data to the vocabs index")
     public ResponseEntity<String> populateDataToVocabsIndex() throws IOException {
+        // clear existing caches
+        vocabService.clearParameterVocabCache();
+        vocabService.clearPlatformVocabCache();
+        vocabService.clearOrganisationVocabCache();
+        // populate new data
         vocabService.populateVocabsData();
         return ResponseEntity.status(HttpStatus.NO_CONTENT).body("Populated data to the vocabs index");
     }

diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java
@@ -4,7 +4,7 @@
 import au.org.aodn.esindexer.exception.*;
 import au.org.aodn.esindexer.utils.JaxbUtils;
 import au.org.aodn.metadata.iso19115_3_2018.MDMetadataType;
-import au.org.aodn.stac.model.RecordSuggest;
+import au.org.aodn.stac.model.SearchSuggestionsModel;
 import au.org.aodn.stac.model.StacCollectionModel;
 import co.elastic.clients.elasticsearch.ElasticsearchClient;
 import co.elastic.clients.elasticsearch._types.ElasticsearchException;
@@ -44,7 +44,6 @@
 
 @Slf4j
 @Service
-@Slf4j
 public class IndexerServiceImpl implements IndexerService {
 
     protected String indexName;
@@ -172,36 +171,24 @@ protected StacCollectionModel getMappedMetadataValues(String metadataValues) thr
 
         // parameter vocabs
         List<String> processedParameterVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_DISCOVERY_PARAMETER_VOCABS);
-        if (!processedParameterVocabs.isEmpty()) {
-            stacCollectionModel.getSummaries().setParameterVocabs(processedParameterVocabs);
-        }
 
-        /*
-        NOTE: The following implementation for platform and organization vocabularies is just a placeholder, not the final version.
-        It follows the same logic as what we intended for the parameter vocabulary, where we extract the list of second-level vocabularies that a record belongs to from its bottom-level vocabularies.
+        // NOTE: The following implementation for platform and organization vocabularies is just a placeholder, not the final version.
+        // It follows the same logic as what we intended for the parameter vocabulary, where we extract the list of second-level vocabularies that a record belongs to from its bottom-level vocabularies.
         // TODO: Adjust if necessary, or remove the above comments after making a final decision.
-        --------------BEGIN--------------
-        */
         // platform vocabs
         List<String> processedPlatformVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_PLATFORM_VOCABS);
-        if (!processedPlatformVocabs.isEmpty()) {
-            stacCollectionModel.getSummaries().setPlatformVocabs(processedPlatformVocabs);
-        }
         // organisation vocabs
         List<String> processedOrganisationVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_ORGANISATION_VOCABS);
-        if (!processedOrganisationVocabs.isEmpty()) {
-            stacCollectionModel.getSummaries().setOrganisationVocabs(processedOrganisationVocabs);
-        }
-        /*
-        --------------END--------------
-         */
 
         // categories suggest using a different index
         // extendable for other aspects of the records data. eg. title, description, etc. something that are unique to the record and currently using "text" type
-        RecordSuggest recordSuggest = RecordSuggest.builder()
+        SearchSuggestionsModel searchSuggestionsModel = SearchSuggestionsModel.builder()
                 .abstractPhrases(this.extractTokensFromDescription(stacCollectionModel.getDescription()))
+                .parameterVocabs(!processedParameterVocabs.isEmpty() ? processedParameterVocabs : null)
+                .platformVocabs(!processedPlatformVocabs.isEmpty() ? processedPlatformVocabs : null)
+                .organisationVocabs(!processedOrganisationVocabs.isEmpty() ? processedOrganisationVocabs : null)
                 .build();
-        stacCollectionModel.setRecordSuggest(recordSuggest);
+        stacCollectionModel.setSearchSuggestionsModel(searchSuggestionsModel);
 
         return stacCollectionModel;
     }

diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/VocabServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/VocabServiceImpl.java
@@ -364,13 +364,12 @@ public List<String> extractVocabLabelsFromThemes(List<ThemesModel> themes, Strin
         };
 
         if (!vocabs.isEmpty() && !themes.isEmpty()) {
-            themes.stream().filter(Objects::nonNull).forEach(theme -> {
-                vocabs.stream().filter(Objects::nonNull).forEach(topLevelVocab -> {
-                    if (topLevelVocab.has("narrower") && !topLevelVocab.get("narrower").isEmpty()) {
-                        for (JsonNode secondLevelVocab : topLevelVocab.get("narrower")) {
-                            if (secondLevelVocab != null && secondLevelVocab.has("label") && secondLevelVocab.has("about")) {
-                                String secondLevelVocabLabel = secondLevelVocab.get("label").asText().toLowerCase();
-
+            vocabs.stream().filter(Objects::nonNull).forEach(topLevelVocab -> {
+                if (topLevelVocab.has("narrower") && !topLevelVocab.get("narrower").isEmpty()) {
+                    for (JsonNode secondLevelVocab : topLevelVocab.get("narrower")) {
+                        if (secondLevelVocab != null && secondLevelVocab.has("label") && secondLevelVocab.has("about")) {
+                            String secondLevelVocabLabel = secondLevelVocab.get("label").asText().toLowerCase();
+                            themes.stream().filter(Objects::nonNull).forEach(theme -> {
                                 ConceptModel secondLevelVocabAsConcept = ConceptModel.builder()
                                         .id(secondLevelVocab.get("label").asText())
                                         .url(secondLevelVocab.get("about").asText())
@@ -379,7 +378,6 @@ public List<String> extractVocabLabelsFromThemes(List<ThemesModel> themes, Strin
                                 // if the record's theme is already second-level vocab, no need to further check
                                 if (themeMatchConcept(theme, secondLevelVocabAsConcept) && !results.contains(secondLevelVocabLabel)) {
                                     results.add(secondLevelVocabLabel);
-                                    break;
                                 }
 
                                 // if the record's theme is leaf-node (bottom-level vocab)
@@ -401,10 +399,10 @@ public List<String> extractVocabLabelsFromThemes(List<ThemesModel> themes, Strin
                                         }
                                     }
                                 }
-                            }
+                            });
                         }
                     }
-                });
+                }
             });
         }
         return results;

diff --git a/indexer/src/main/resources/config_files/portal_records_index_schema.json b/indexer/src/main/resources/config_files/portal_records_index_schema.json
@@ -79,13 +79,15 @@
           }
         }
       },
-      "record_suggest": {
+      "search_suggestions": {
         "type": "nested",
         "properties": {
-          "abstract_phrases": { "type": "search_as_you_type", "analyzer": "custom_analyser" }
+          "abstract_phrases": { "type": "search_as_you_type", "analyzer": "custom_analyser" },
+          "parameter_vocabs": { "type": "search_as_you_type", "analyzer": "custom_analyser" },
+          "platform_vocabs": { "type": "search_as_you_type", "analyzer": "custom_analyser" },
+          "organisation_vocabs": { "type": "search_as_you_type", "analyzer": "custom_analyser" }
         }
       },
-      "parameter_vocabs" : { "type":  "keyword" },
       "keywords": {
         "type": "nested",
         "properties": {

diff --git a/indexer/src/main/resources/config_files/vocabs_index_schema.json b/indexer/src/main/resources/config_files/vocabs_index_schema.json
@@ -1,49 +1,87 @@
 {
-  "settings":{
-    "analysis":{
-      "analyzer":{
-        "custom_analyser":{
-          "type":"custom",
-          "tokenizer":"standard",
-          "filter":[
-            "lowercase",
-            "english_stop"
-          ]
-        }
-      },
-      "filter":{
-        "english_stop":{
-          "type":"stop",
-          "stopwords":"_english_"
-        }
-      }
-    }
-  },
   "mappings": {
     "dynamic": true,
     "properties": {
       "parameter_vocab": {
         "properties": {
           "label": {
-            "type": "search_as_you_type",
-            "analyzer": "custom_analyser"
+            "type": "text"
+          },
+          "definition": {
+            "type": "text"
+          },
+          "about": {
+            "type": "keyword"
+          },
+          "narrower": {
+            "type": "nested",
+            "properties": {
+              "label": {
+                "type": "text"
+              },
+              "about": {
+                "type": "keyword"
+              },
+              "narrower": {
+                "type": "nested",
+                "properties": {
+                  "label": {
+                    "type": "text"
+                  },
+                  "about": {
+                    "type": "keyword"
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
+      "platform_vocab": {
+        "properties": {
+          "label": {
+            "type": "text"
           },
           "definition": {
             "type": "text"
           },
           "about": {
             "type": "keyword"
           },
-          "broader": {
+          "narrower": {
             "type": "nested",
             "properties": {
               "label": {
                 "type": "text"
               },
               "about": {
                 "type": "keyword"
+              },
+              "narrower": {
+                "type": "nested",
+                "properties": {
+                  "label": {
+                    "type": "text"
+                  },
+                  "about": {
+                    "type": "keyword"
+                  }
+                }
               }
             }
+          }
+        }
+      },
+      "organisation_vocab": {
+        "properties": {
+          "label": {
+            "type": "text"
+          },
+          "definition": {
+            "type": "text"
+          },
+          "about": {
+            "type": "keyword"
           },
           "narrower": {
             "type": "nested",

diff --git a/indexer/src/test/java/au/org/aodn/esindexer/service/GeoNetworkServiceTests.java b/indexer/src/test/java/au/org/aodn/esindexer/service/GeoNetworkServiceTests.java
@@ -82,9 +82,6 @@ public void verifyInsertMetadataWorks() throws IOException {
 
             logger.debug("Get count in verifyInsertMetadataWorks");
 
-            Assertions.assertFalse(geoNetworkService.isMetadataRecordsCountLessThan(1), "Compare false");
-            Assertions.assertTrue(geoNetworkService.isMetadataRecordsCountLessThan(2), "Compare true");
-
             Iterable<String> i = geoNetworkService.getAllMetadataRecords(null);
 
             for (String x : i) {

diff --git a/indexer/src/test/java/au/org/aodn/esindexer/service/IndexerServiceTests.java b/indexer/src/test/java/au/org/aodn/esindexer/service/IndexerServiceTests.java
@@ -7,7 +7,10 @@
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.json.JSONException;
 import org.junit.jupiter.api.*;
+import org.skyscreamer.jsonassert.JSONAssert;
+import org.skyscreamer.jsonassert.JSONCompareMode;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
@@ -19,8 +22,6 @@
 import java.util.List;
 import java.util.Objects;
 
-import static au.org.aodn.esindexer.utils.CommonUtils.persevere;
-
 @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
 @ActiveProfiles("test")
 @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@@ -134,9 +135,10 @@ public void verifyGetDocumentByUUID() throws IOException {
             String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
             String actual = indexerObjectMapper.readTree(test).toPrettyString();
 
-            Assertions.assertEquals(expected, actual, "Stac not equals for sample 4. Uuid:" + uuid);
-        }
-        finally {
+            JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
+        } catch (JSONException e) {
+            throw new RuntimeException(e);
+        } finally {
             deleteRecord(uuid);
         }
     }
@@ -164,7 +166,9 @@ public void verifyAssociatedRecordIndexer() throws IOException{
             String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
             String actual = indexerObjectMapper.readTree(resultJson).toPrettyString();
 
-            Assertions.assertEquals(expected, actual, "stac not equals for associated/self.json");
+            JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
+        } catch (JSONException e) {
+            throw new RuntimeException(e);
         } finally {
             deleteRecord(targetRecordId, parentId, siblingId, childId);
         }
@@ -193,9 +197,10 @@ public void verifyLogoLinkAddedOnIndex() throws IOException {
             String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
             String actual = indexerObjectMapper.readTree(test).toPrettyString();
 
-            Assertions.assertEquals(expected, actual, "Stac not equals for sample 5. Uuid: " + uuid);
-        }
-        finally {
+            JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
+        } catch (JSONException e) {
+            throw new RuntimeException(e);
+        } finally {
             deleteRecord(uuid);
         }
     }
@@ -219,9 +224,10 @@ public void verifyThumbnailLinkAddedOnIndex() throws IOException {
             String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
             String actual = indexerObjectMapper.readTree(test).toPrettyString();
 
-            Assertions.assertEquals(expected, actual, "Stac not equals for sample 6. Uuid: " + uuid);
-        }
-        finally {
+            JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
+        } catch (JSONException e) {
+            throw new RuntimeException(e);
+        } finally {
             deleteRecord(uuid);
         }
     }
@@ -247,9 +253,10 @@ public void verifyThumbnailLinkNullAddedOnIndex() throws IOException {
             String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
             String actual = indexerObjectMapper.readTree(test).toPrettyString();
 
-            Assertions.assertEquals(expected, actual, "Stac not equals for sample 7. Uuid: " + uuid);
-        }
-        finally {
+            JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
+        } catch (JSONException e) {
+            throw new RuntimeException(e);
+        } finally {
             deleteRecord(uuid);
         }
     }
@@ -260,22 +267,22 @@ public void verifyExtractedVocabsFromActualRecord() throws IOException {
         try {
             insertMetadataRecords(uuid, "classpath:canned/sample11.xml");
 
-            indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null);
+            indexerService.indexAllMetadataRecordsFromGeoNetwork(null,true, null);
             Hit<ObjectNode> objectNodeHit = indexerService.getDocumentByUUID(uuid);
 
             String test = String.valueOf(Objects.requireNonNull(objectNodeHit.source()));
             JsonNode rootNode = indexerObjectMapper.readTree(test);
 
             List<String> expectedParameterVocabs = Arrays.asList("oxygen", "alkalinity", "nutrient", "carbon", "salinity" );
-            List<String> actualParameterVocabs = indexerObjectMapper.convertValue(rootNode.path("summaries").path("parameter_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
+            List<String> actualParameterVocabs = indexerObjectMapper.convertValue(rootNode.path("search_suggestions").path("parameter_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
             Assertions.assertEquals(expectedParameterVocabs.size(), actualParameterVocabs.size(), "ParameterVocabs not equals for sample11.");
 
             List<String> expectedPlatformVocabs = List.of("small boat");
-            List<String> actualPlatformVocabs = indexerObjectMapper.convertValue(rootNode.path("summaries").path("platform_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
+            List<String> actualPlatformVocabs = indexerObjectMapper.convertValue(rootNode.path("search_suggestions").path("platform_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
             Assertions.assertEquals(expectedPlatformVocabs.size(), actualPlatformVocabs.size(), "PlatformVocabs not equals for sample11.");
 
             List<String> expectedOrganisationVocabs = List.of("national mooring network facility, integrated marine observing system (imos)");
-            List<String> actualOrganisationVocabs = indexerObjectMapper.convertValue(rootNode.path("summaries").path("parameter_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
+            List<String> actualOrganisationVocabs = indexerObjectMapper.convertValue(rootNode.path("search_suggestions").path("organisation_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
             Assertions.assertEquals(expectedOrganisationVocabs.size(), actualOrganisationVocabs.size(), "OrganisationVocabs not equals for sample11.");
         } finally {
             deleteRecord(uuid);
@@ -301,7 +308,7 @@ public void verifyAbstractPhrases() throws IOException {
 
             // Parse the JSON string into a JsonNode
             JsonNode rootNode = indexerObjectMapper.readTree(test);
-            JsonNode abstractPhrasesNode = rootNode.path("record_suggest").path("abstract_phrases");
+            JsonNode abstractPhrasesNode = rootNode.path("search_suggestions").path("abstract_phrases");
             List<String> actual = indexerObjectMapper.convertValue(abstractPhrasesNode, indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
 
             logger.info(test);