Skip to content

Commit

Permalink
Merge pull request #29 from aodn/ranking-by-completeness
Browse files Browse the repository at this point in the history
Ranking by completeness
  • Loading branch information
utas-raymondng authored Nov 23, 2023
2 parents 43bd250 + 6fea57c commit a13a849
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 19 deletions.
43 changes: 31 additions & 12 deletions src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;

import au.org.aodn.esindexer.model.StacCollectionModel;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
Expand All @@ -59,6 +59,9 @@ public class IndexerServiceImpl implements IndexerService {
@Autowired
JaxbUtils<MDMetadataType> jaxbUtils;

@Autowired
RankingService rankingService;

private static final Logger logger = LoggerFactory.getLogger(IndexerServiceImpl.class);

protected long getDocumentsCount() {
Expand Down Expand Up @@ -127,14 +130,33 @@ protected void deleteIndexStore() {
}
}

protected JSONObject getMappedMetadataValues(String metadataValues) throws IOException, FactoryException, TransformException, JAXBException {
MDMetadataType metadataType = jaxbUtils.unmarshal(metadataValues);

StacCollectionModel stacCollectionModel = mapper.mapToSTACCollection(metadataType);

// evaluate completeness
Integer completeness = rankingService.evaluateCompleteness(stacCollectionModel);
// TODO: in future, evaluate other aspects of the data such as relevance, quality, etc using NLP

/* expand score with other aspect of the data such as relevance, quality, etc.
* can maintain 100 points as the maximum score by dividing the score by the number of aspects (round up/down to the nearest integer)
* given max score is 100 for each aspect
* e.g completeness = 80, relevance = 90, quality = 100
* final score = (80 + 90 + 100) / 3 = 90
*/
Integer score = completeness;

stacCollectionModel.getSummaries().setScore(score);

return new JSONObject(objectMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(stacCollectionModel));
}

public ResponseEntity<String> indexMetadata(String metadataValues) {
try {
JSONObject mappedMetadataValues;
JSONObject mappedMetadataValues = this.getMappedMetadataValues(metadataValues);
IndexRequest<JsonData> req;
MDMetadataType metadataType = jaxbUtils.unmarshal(metadataValues);
mappedMetadataValues = new JSONObject(objectMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(mapper.mapToSTACCollection(metadataType)));

String uuid = mappedMetadataValues.getString("id");
long portalIndexDocumentsCount;
Expand Down Expand Up @@ -231,16 +253,13 @@ public ResponseEntity<String> indexAllMetadataRecordsFromGeoNetwork(boolean conf

for (String metadataRecord : geoNetworkResourceService.getAllMetadataRecords()) {
try {
MDMetadataType metadataType = jaxbUtils.unmarshal(metadataRecord);
// get mapped metadata values from GeoNetwork to STAC collection schema
JSONObject mappedRecord = new JSONObject(objectMapper
.writerWithDefaultPrettyPrinter()
.writeValueAsString(mapper.mapToSTACCollection(metadataType)));
JSONObject mappedMetadataValues = this.getMappedMetadataValues(metadataRecord);

logger.debug("Final output json is {}", mappedRecord);
logger.debug("Final output json is {}", mappedMetadataValues);

// convert mapped values to binary data
ByteArrayInputStream input = new ByteArrayInputStream(mappedRecord.toString().getBytes());
ByteArrayInputStream input = new ByteArrayInputStream(mappedMetadataValues.toString().getBytes());
BinaryData data = BinaryData.of(IOUtils.toByteArray(input), ContentType.APPLICATION_JSON);

// send bulk request to Elasticsearch
Expand All @@ -251,7 +270,7 @@ public ResponseEntity<String> indexAllMetadataRecordsFromGeoNetwork(boolean conf
)
);

logger.info("Ingested a new metadata document with UUID: " + mappedRecord.getString("id"));
logger.info("Ingested a new metadata document with UUID: " + mappedMetadataValues.getString("id"));

} catch (FactoryException | JAXBException | TransformException e) {
/* it will reach here if cannot extract values of all the keys in GeoNetwork metadata JSON
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package au.org.aodn.esindexer.service;

import au.org.aodn.esindexer.model.StacCollectionModel;

public interface RankingService {
public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package au.org.aodn.esindexer.service;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

import au.org.aodn.esindexer.model.StacCollectionModel;

@Service
public class RankingServiceImpl implements RankingService {

protected static Logger logger = LoggerFactory.getLogger(RankingServiceImpl.class);

public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
Integer total = 0;

/*
* The implementation of this method can be adjusted
* Current scoring system is (well, I made it up! feel free to change it)
* 1. 15 points for title
* 2. 15 points for description
* 3. 10 points for extent geometry
* 4. 10 points for extent temporal
* 5a. 10 points for links with just 1-2 link
* * 5b. 15 points for links with 3-5 links
* * 5c. 20 points for links more than 5 links
* 6a. 10 points for themes with just 1-2 themes
* * 6b. 15 points for themes with 3-5 themes
* * 6c. 20 points for themes more than 5 themes
* 7. 10 points for contacts
* Total: 100 points
* */

if (stacCollectionModel.getTitle() != null) {
logger.debug("Title found");
total += 15;
}

if (stacCollectionModel.getDescription() != null) {
logger.debug("Description found");
total += 15;
}

if (stacCollectionModel.getExtent().getBbox() != null) {
logger.debug("Extent found");
total += 10;
}

if (stacCollectionModel.getExtent().getTemporal() != null) {
logger.debug("Temporal found");
total += 10;
}

if (stacCollectionModel.getLinks() != null) {
if (stacCollectionModel.getLinks().size() <= 2) {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 10;
} else if (stacCollectionModel.getLinks().size() <= 5) {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 15;
} else {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 20;
}
}

if (stacCollectionModel.getThemes() != null) {
if (stacCollectionModel.getThemes().size() <= 2) {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 10;
} else if (stacCollectionModel.getThemes().size() <= 5) {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 15;
} else {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 20;
}
}

if (stacCollectionModel.getContacts() != null) {
logger.debug("Contacts found");
total += 10;
}

return total;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.mapstruct.Named;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

Expand All @@ -34,7 +35,6 @@ public abstract class StacCollectionMapperServiceImpl implements StacCollectionM
@Mapping(target="uuid", source = "source", qualifiedByName = "mapUUID")
@Mapping(target="title", source = "source", qualifiedByName = "mapTitle" )
@Mapping(target="description", source = "source", qualifiedByName = "mapDescription")
@Mapping(target="summaries.score", source = "source", qualifiedByName = "mapSummaries.score")
@Mapping(target="summaries.status", source = "source", qualifiedByName = "mapSummaries.status")
@Mapping(target="summaries.scope", source = "source", qualifiedByName = "mapSummaries.scope")
@Mapping(target="summaries.geometry", source = "source", qualifiedByName = "mapSummaries.geometry")
Expand All @@ -49,6 +49,7 @@ public abstract class StacCollectionMapperServiceImpl implements StacCollectionM
@Mapping(target="providers", source = "source", qualifiedByName = "mapProviders")
public abstract StacCollectionModel mapToSTACCollection(MDMetadataType source);


private static final Logger logger = LoggerFactory.getLogger(StacCollectionMapperServiceImpl.class);

@Value("${spring.jpa.properties.hibernate.jdbc.time_zone}")
Expand Down Expand Up @@ -201,12 +202,6 @@ Map mapSummariesGeometry(MDMetadataType source) {
);
}

@Named("mapSummaries.score")
Integer createSummariesScore(MDMetadataType source) {
//TODO: need cal logic
return 0;
}

@Named("mapSummaries.status")
String createSummariesStatus(MDMetadataType source) {
List<MDDataIdentificationType> items = findMDDataIdentificationType(source);
Expand Down

0 comments on commit a13a849

Please sign in to comment.