Skip to content

Commit

Permalink
Update schema and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vietnguyengit committed Sep 3, 2024
1 parent eb89193 commit 443acfd
Show file tree
Hide file tree
Showing 32 changed files with 855 additions and 1,066 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public ResponseEntity<List<JsonNode>> getOrganisationVocabsFromArdc() {
@GetMapping(path="/vocabs/populate")
@Operation(security = { @SecurityRequirement(name = "X-API-Key") }, description = "Populate data to the vocabs index")
public ResponseEntity<String> populateDataToVocabsIndex() throws IOException {
// clear existing caches
vocabService.clearParameterVocabCache();
vocabService.clearPlatformVocabCache();
vocabService.clearOrganisationVocabCache();
// populate new data
vocabService.populateVocabsData();
return ResponseEntity.status(HttpStatus.NO_CONTENT).body("Populated data to the vocabs index");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import au.org.aodn.esindexer.exception.*;
import au.org.aodn.esindexer.utils.JaxbUtils;
import au.org.aodn.metadata.iso19115_3_2018.MDMetadataType;
import au.org.aodn.stac.model.RecordSuggest;
import au.org.aodn.stac.model.SearchSuggestionsModel;
import au.org.aodn.stac.model.StacCollectionModel;
import co.elastic.clients.elasticsearch.ElasticsearchClient;
import co.elastic.clients.elasticsearch._types.ElasticsearchException;
Expand Down Expand Up @@ -44,7 +44,6 @@

@Slf4j
@Service
@Slf4j
public class IndexerServiceImpl implements IndexerService {

protected String indexName;
Expand Down Expand Up @@ -172,36 +171,24 @@ protected StacCollectionModel getMappedMetadataValues(String metadataValues) thr

// parameter vocabs
List<String> processedParameterVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_DISCOVERY_PARAMETER_VOCABS);
if (!processedParameterVocabs.isEmpty()) {
stacCollectionModel.getSummaries().setParameterVocabs(processedParameterVocabs);
}

/*
NOTE: The following implementation for platform and organization vocabularies is just a placeholder, not the final version.
It follows the same logic as what we intended for the parameter vocabulary, where we extract the list of second-level vocabularies that a record belongs to from its bottom-level vocabularies.
// NOTE: The following implementation for platform and organization vocabularies is just a placeholder, not the final version.
// It follows the same logic as what we intended for the parameter vocabulary, where we extract the list of second-level vocabularies that a record belongs to from its bottom-level vocabularies.
// TODO: Adjust if necessary, or remove the above comments after making a final decision.
--------------BEGIN--------------
*/
// platform vocabs
List<String> processedPlatformVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_PLATFORM_VOCABS);
if (!processedPlatformVocabs.isEmpty()) {
stacCollectionModel.getSummaries().setPlatformVocabs(processedPlatformVocabs);
}
// organisation vocabs
List<String> processedOrganisationVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_ORGANISATION_VOCABS);
if (!processedOrganisationVocabs.isEmpty()) {
stacCollectionModel.getSummaries().setOrganisationVocabs(processedOrganisationVocabs);
}
/*
--------------END--------------
*/

// categories suggest using a different index
// extendable for other aspects of the records data. eg. title, description, etc. something that are unique to the record and currently using "text" type
RecordSuggest recordSuggest = RecordSuggest.builder()
SearchSuggestionsModel searchSuggestionsModel = SearchSuggestionsModel.builder()
.abstractPhrases(this.extractTokensFromDescription(stacCollectionModel.getDescription()))
.parameterVocabs(!processedParameterVocabs.isEmpty() ? processedParameterVocabs : null)
.platformVocabs(!processedPlatformVocabs.isEmpty() ? processedPlatformVocabs : null)
.organisationVocabs(!processedOrganisationVocabs.isEmpty() ? processedOrganisationVocabs : null)
.build();
stacCollectionModel.setRecordSuggest(recordSuggest);
stacCollectionModel.setSearchSuggestionsModel(searchSuggestionsModel);

return stacCollectionModel;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,12 @@ public List<String> extractVocabLabelsFromThemes(List<ThemesModel> themes, Strin
};

if (!vocabs.isEmpty() && !themes.isEmpty()) {
themes.stream().filter(Objects::nonNull).forEach(theme -> {
vocabs.stream().filter(Objects::nonNull).forEach(topLevelVocab -> {
if (topLevelVocab.has("narrower") && !topLevelVocab.get("narrower").isEmpty()) {
for (JsonNode secondLevelVocab : topLevelVocab.get("narrower")) {
if (secondLevelVocab != null && secondLevelVocab.has("label") && secondLevelVocab.has("about")) {
String secondLevelVocabLabel = secondLevelVocab.get("label").asText().toLowerCase();

vocabs.stream().filter(Objects::nonNull).forEach(topLevelVocab -> {
if (topLevelVocab.has("narrower") && !topLevelVocab.get("narrower").isEmpty()) {
for (JsonNode secondLevelVocab : topLevelVocab.get("narrower")) {
if (secondLevelVocab != null && secondLevelVocab.has("label") && secondLevelVocab.has("about")) {
String secondLevelVocabLabel = secondLevelVocab.get("label").asText().toLowerCase();
themes.stream().filter(Objects::nonNull).forEach(theme -> {
ConceptModel secondLevelVocabAsConcept = ConceptModel.builder()
.id(secondLevelVocab.get("label").asText())
.url(secondLevelVocab.get("about").asText())
Expand All @@ -379,7 +378,6 @@ public List<String> extractVocabLabelsFromThemes(List<ThemesModel> themes, Strin
// if the record's theme is already second-level vocab, no need to further check
if (themeMatchConcept(theme, secondLevelVocabAsConcept) && !results.contains(secondLevelVocabLabel)) {
results.add(secondLevelVocabLabel);
break;
}

// if the record's theme is leaf-node (bottom-level vocab)
Expand All @@ -401,10 +399,10 @@ public List<String> extractVocabLabelsFromThemes(List<ThemesModel> themes, Strin
}
}
}
}
});
}
}
});
}
});
}
return results;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@
}
}
},
"record_suggest": {
"search_suggestions": {
"type": "nested",
"properties": {
"abstract_phrases": { "type": "search_as_you_type", "analyzer": "custom_analyser" }
"abstract_phrases": { "type": "search_as_you_type", "analyzer": "custom_analyser" },
"parameter_vocabs": { "type": "search_as_you_type", "analyzer": "custom_analyser" },
"platform_vocabs": { "type": "search_as_you_type", "analyzer": "custom_analyser" },
"organisation_vocabs": { "type": "search_as_you_type", "analyzer": "custom_analyser" }
}
},
"parameter_vocabs" : { "type": "keyword" },
"keywords": {
"type": "nested",
"properties": {
Expand Down
84 changes: 61 additions & 23 deletions indexer/src/main/resources/config_files/vocabs_index_schema.json
Original file line number Diff line number Diff line change
@@ -1,49 +1,87 @@
{
"settings":{
"analysis":{
"analyzer":{
"custom_analyser":{
"type":"custom",
"tokenizer":"standard",
"filter":[
"lowercase",
"english_stop"
]
}
},
"filter":{
"english_stop":{
"type":"stop",
"stopwords":"_english_"
}
}
}
},
"mappings": {
"dynamic": true,
"properties": {
"parameter_vocab": {
"properties": {
"label": {
"type": "search_as_you_type",
"analyzer": "custom_analyser"
"type": "text"
},
"definition": {
"type": "text"
},
"about": {
"type": "keyword"
},
"narrower": {
"type": "nested",
"properties": {
"label": {
"type": "text"
},
"about": {
"type": "keyword"
},
"narrower": {
"type": "nested",
"properties": {
"label": {
"type": "text"
},
"about": {
"type": "keyword"
}
}
}
}
}
}
},
"platform_vocab": {
"properties": {
"label": {
"type": "text"
},
"definition": {
"type": "text"
},
"about": {
"type": "keyword"
},
"broader": {
"narrower": {
"type": "nested",
"properties": {
"label": {
"type": "text"
},
"about": {
"type": "keyword"
},
"narrower": {
"type": "nested",
"properties": {
"label": {
"type": "text"
},
"about": {
"type": "keyword"
}
}
}
}
}
}
},
"organisation_vocab": {
"properties": {
"label": {
"type": "text"
},
"definition": {
"type": "text"
},
"about": {
"type": "keyword"
},
"narrower": {
"type": "nested",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,6 @@ public void verifyInsertMetadataWorks() throws IOException {

logger.debug("Get count in verifyInsertMetadataWorks");

Assertions.assertFalse(geoNetworkService.isMetadataRecordsCountLessThan(1), "Compare false");
Assertions.assertTrue(geoNetworkService.isMetadataRecordsCountLessThan(2), "Compare true");

Iterable<String> i = geoNetworkService.getAllMetadataRecords(null);

for (String x : i) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.json.JSONException;
import org.junit.jupiter.api.*;
import org.skyscreamer.jsonassert.JSONAssert;
import org.skyscreamer.jsonassert.JSONCompareMode;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
Expand All @@ -19,8 +22,6 @@
import java.util.List;
import java.util.Objects;

import static au.org.aodn.esindexer.utils.CommonUtils.persevere;

@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@ActiveProfiles("test")
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
Expand Down Expand Up @@ -134,9 +135,10 @@ public void verifyGetDocumentByUUID() throws IOException {
String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
String actual = indexerObjectMapper.readTree(test).toPrettyString();

Assertions.assertEquals(expected, actual, "Stac not equals for sample 4. Uuid:" + uuid);
}
finally {
JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
} catch (JSONException e) {
throw new RuntimeException(e);
} finally {
deleteRecord(uuid);
}
}
Expand Down Expand Up @@ -164,7 +166,9 @@ public void verifyAssociatedRecordIndexer() throws IOException{
String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
String actual = indexerObjectMapper.readTree(resultJson).toPrettyString();

Assertions.assertEquals(expected, actual, "stac not equals for associated/self.json");
JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
} catch (JSONException e) {
throw new RuntimeException(e);
} finally {
deleteRecord(targetRecordId, parentId, siblingId, childId);
}
Expand Down Expand Up @@ -193,9 +197,10 @@ public void verifyLogoLinkAddedOnIndex() throws IOException {
String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
String actual = indexerObjectMapper.readTree(test).toPrettyString();

Assertions.assertEquals(expected, actual, "Stac not equals for sample 5. Uuid: " + uuid);
}
finally {
JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
} catch (JSONException e) {
throw new RuntimeException(e);
} finally {
deleteRecord(uuid);
}
}
Expand All @@ -219,9 +224,10 @@ public void verifyThumbnailLinkAddedOnIndex() throws IOException {
String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
String actual = indexerObjectMapper.readTree(test).toPrettyString();

Assertions.assertEquals(expected, actual, "Stac not equals for sample 6. Uuid: " + uuid);
}
finally {
JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
} catch (JSONException e) {
throw new RuntimeException(e);
} finally {
deleteRecord(uuid);
}
}
Expand All @@ -247,9 +253,10 @@ public void verifyThumbnailLinkNullAddedOnIndex() throws IOException {
String expected = indexerObjectMapper.readTree(expectedData).toPrettyString();
String actual = indexerObjectMapper.readTree(test).toPrettyString();

Assertions.assertEquals(expected, actual, "Stac not equals for sample 7. Uuid: " + uuid);
}
finally {
JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT);
} catch (JSONException e) {
throw new RuntimeException(e);
} finally {
deleteRecord(uuid);
}
}
Expand All @@ -260,22 +267,22 @@ public void verifyExtractedVocabsFromActualRecord() throws IOException {
try {
insertMetadataRecords(uuid, "classpath:canned/sample11.xml");

indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null);
indexerService.indexAllMetadataRecordsFromGeoNetwork(null,true, null);
Hit<ObjectNode> objectNodeHit = indexerService.getDocumentByUUID(uuid);

String test = String.valueOf(Objects.requireNonNull(objectNodeHit.source()));
JsonNode rootNode = indexerObjectMapper.readTree(test);

List<String> expectedParameterVocabs = Arrays.asList("oxygen", "alkalinity", "nutrient", "carbon", "salinity" );
List<String> actualParameterVocabs = indexerObjectMapper.convertValue(rootNode.path("summaries").path("parameter_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
List<String> actualParameterVocabs = indexerObjectMapper.convertValue(rootNode.path("search_suggestions").path("parameter_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
Assertions.assertEquals(expectedParameterVocabs.size(), actualParameterVocabs.size(), "ParameterVocabs not equals for sample11.");

List<String> expectedPlatformVocabs = List.of("small boat");
List<String> actualPlatformVocabs = indexerObjectMapper.convertValue(rootNode.path("summaries").path("platform_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
List<String> actualPlatformVocabs = indexerObjectMapper.convertValue(rootNode.path("search_suggestions").path("platform_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
Assertions.assertEquals(expectedPlatformVocabs.size(), actualPlatformVocabs.size(), "PlatformVocabs not equals for sample11.");

List<String> expectedOrganisationVocabs = List.of("national mooring network facility, integrated marine observing system (imos)");
List<String> actualOrganisationVocabs = indexerObjectMapper.convertValue(rootNode.path("summaries").path("parameter_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
List<String> actualOrganisationVocabs = indexerObjectMapper.convertValue(rootNode.path("search_suggestions").path("organisation_vocabs"), indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
Assertions.assertEquals(expectedOrganisationVocabs.size(), actualOrganisationVocabs.size(), "OrganisationVocabs not equals for sample11.");
} finally {
deleteRecord(uuid);
Expand All @@ -301,7 +308,7 @@ public void verifyAbstractPhrases() throws IOException {

// Parse the JSON string into a JsonNode
JsonNode rootNode = indexerObjectMapper.readTree(test);
JsonNode abstractPhrasesNode = rootNode.path("record_suggest").path("abstract_phrases");
JsonNode abstractPhrasesNode = rootNode.path("search_suggestions").path("abstract_phrases");
List<String> actual = indexerObjectMapper.convertValue(abstractPhrasesNode, indexerObjectMapper.getTypeFactory().constructCollectionType(List.class, String.class));

logger.info(test);
Expand Down
Loading

0 comments on commit 443acfd

Please sign in to comment.