Skip to content

Commit

Permalink
Merge pull request #26 from aodn/encoding-right
Browse files Browse the repository at this point in the history
Fix to ingest when record contains nonvalid chars
  • Loading branch information
utas-raymondng authored Nov 15, 2023
2 parents 05fec85 + 1463088 commit fd75a91
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 31 deletions.
13 changes: 13 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
<!-- <artifactId>spring-boot-configuration-processor</artifactId>-->
<!-- <optional>true</optional>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.16.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId>
Expand All @@ -64,6 +70,13 @@
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import au.org.aodn.esindexer.model.*;
import au.org.aodn.esindexer.utils.BBoxUtils;
import au.org.aodn.esindexer.utils.GeometryUtils;
import au.org.aodn.esindexer.utils.StringUtil;
import au.org.aodn.esindexer.utils.TemporalUtils;
import au.org.aodn.metadata.iso19115_3_2018.*;
import jakarta.xml.bind.JAXBElement;
Expand Down Expand Up @@ -257,11 +258,11 @@ String mapTitle(MDMetadataType source) {
// TODO: Null or empty check
AbstractCitationType ac = i.getCitation().getAbstractCitation().getValue();
if(ac instanceof CICitationType2 type2) {
return type2.getTitle().getCharacterString().getValue().toString();
return StringUtil.toUTF8String(type2.getTitle().getCharacterString().getValue().toString());
}
else if(ac instanceof CICitationType type1) {
// Backward compatible
type1.getTitle().getCharacterString().getValue().toString();
return StringUtil.toUTF8String(type1.getTitle().getCharacterString().getValue().toString());
}
}
}
Expand All @@ -273,10 +274,10 @@ protected List<Map<String, String>> mapThemesConcepts(MDKeywordsPropertyType des
descriptiveKeyword.getMDKeywords().getKeyword().forEach(keyword -> {
if (keyword != null) {
if (keyword.getCharacterString().getValue() instanceof AnchorType value) {
keywords.add(Map.of("id", value.getValue(),
"url", value.getHref()));
keywords.add(Map.of("id", StringUtil.toUTF8String(value.getValue()),
"url", StringUtil.toUTF8String(value.getHref())));
} else {
keywords.add(Map.of("id", keyword.getCharacterString().getValue().toString()));
keywords.add(Map.of("id", StringUtil.toUTF8String(keyword.getCharacterString().getValue().toString())));
}
}
});
Expand All @@ -290,12 +291,12 @@ protected String mapThemesTitle(MDKeywordsPropertyType descriptiveKeyword, Strin
CharacterStringPropertyType titleString = thesaurusNameType2.getTitle();
if (titleString != null && titleString.getCharacterString().getValue() instanceof AnchorType value) {
if (value.getValue() != null) {
return value.getValue();
return StringUtil.toUTF8String(value.getValue());
} else {
return "";
}
} else if (titleString != null && titleString.getCharacterString().getValue() instanceof String value) {
return value;
return StringUtil.toUTF8String(value);
}
}
logger.debug("Unable to find themes' title for metadata record: " + uuid);
Expand All @@ -309,12 +310,12 @@ protected String mapThemesDescription(MDKeywordsPropertyType descriptiveKeyword,
CharacterStringPropertyType titleString = thesaurusNameType2.getTitle();
if (titleString != null && titleString.getCharacterString().getValue() instanceof AnchorType value) {
if (value.getTitleAttribute() != null) {
return value.getTitleAttribute();
return StringUtil.toUTF8String(value.getTitleAttribute());
} else {
return "";
}
} else if (titleString != null && titleString.getCharacterString().getValue() instanceof String value) {
return thesaurusNameType2.getAlternateTitle().stream().map(CharacterStringPropertyType::getCharacterString).map(JAXBElement::getValue).map(Object::toString).collect(Collectors.joining(", "));
return StringUtil.toUTF8String(thesaurusNameType2.getAlternateTitle().stream().map(CharacterStringPropertyType::getCharacterString).map(JAXBElement::getValue).map(Object::toString).collect(Collectors.joining(", ")));
}
}
logger.debug("Unable to find themes' description for metadata record: " + uuid);
Expand All @@ -325,7 +326,7 @@ protected String mapThemesScheme(MDKeywordsPropertyType descriptiveKeyword, Stri
AbstractCitationPropertyType abstractCitationPropertyType = descriptiveKeyword.getMDKeywords().getThesaurusName();
if (abstractCitationPropertyType != null) {
if (descriptiveKeyword.getMDKeywords().getType() != null) {
return descriptiveKeyword.getMDKeywords().getType().getMDKeywordTypeCode().getCodeListValue();
return StringUtil.toUTF8String(descriptiveKeyword.getMDKeywords().getType().getMDKeywordTypeCode().getCodeListValue());
} else {
return "";
}
Expand Down Expand Up @@ -370,7 +371,7 @@ List<LinkModel> mapLinks(MDMetadataType source) {
linkModel.setType(Objects.equals(ciOnlineResource.getProtocol().getCharacterString().getValue().toString(), "WWW:LINK-1.0-http--link") ? "text/html" : "");
linkModel.setHref(ciOnlineResource.getLinkage().getCharacterString().getValue().toString());
linkModel.setRel(AppConstants.RECOMMENDED_LINK_REL_TYPE);
linkModel.setTitle(ciOnlineResource.getName() != null ? ciOnlineResource.getName().getCharacterString().getValue().toString() : null);
linkModel.setTitle(ciOnlineResource.getName() != null ? StringUtil.toUTF8String(ciOnlineResource.getName().getCharacterString().getValue().toString()) : null);
results.add(linkModel);
}
}
Expand All @@ -393,7 +394,7 @@ String mapLicense(MDMetadataType source) {
legalConstraintsType.getOtherConstraints().forEach(otherConstraints -> {
for (String potentialKey : potentialKeys) {
if (otherConstraints.getCharacterString() != null && otherConstraints.getCharacterString().getValue().toString().toLowerCase().contains(potentialKey)) {
licenses.add(otherConstraints.getCharacterString().getValue().toString());
licenses.add(StringUtil.toUTF8String(otherConstraints.getCharacterString().getValue().toString()));
}
}
});
Expand All @@ -403,7 +404,7 @@ String mapLicense(MDMetadataType source) {
legalConstraintsType.getReference().forEach(reference -> {
if (reference.getAbstractCitation().getValue() instanceof CICitationType2 ciCitationType2) {
if (ciCitationType2.getTitle() != null) {
licenses.add(ciCitationType2.getTitle().getCharacterString().getValue().toString());
licenses.add(StringUtil.toUTF8String(ciCitationType2.getTitle().getCharacterString().getValue().toString()));
}
}
});
Expand Down Expand Up @@ -502,23 +503,23 @@ List<ContactsModel> mapContacts(MDMetadataType source) {

protected String mapContactsRole(CIResponsibilityType2 ciResponsibility) {
CodeListValueType roleCode = ciResponsibility.getRole().getCIRoleCode();
if (roleCode != null) { return roleCode.getCodeListValue(); } else { return ""; }
if (roleCode != null) { return StringUtil.toUTF8String(roleCode.getCodeListValue()); } else { return ""; }
}

protected String mapContactsOrganization(AbstractCIPartyPropertyType2 party) {
String organisationString = party.getAbstractCIParty().getValue().getName().getCharacterString().getValue().toString();
if (organisationString != null) { return organisationString; } else { return ""; }
if (organisationString != null) { return StringUtil.toUTF8String(organisationString); } else { return ""; }

}

protected String mapContactsName(CIIndividualPropertyType2 individual) {
CharacterStringPropertyType nameString = individual.getCIIndividual().getName();
if (nameString != null) { return individual.getCIIndividual().getName().getCharacterString().getValue().toString(); } else { return ""; }
if (nameString != null) { return StringUtil.toUTF8String(individual.getCIIndividual().getName().getCharacterString().getValue().toString()); } else { return ""; }
}

protected String mapContactsPosition(CIIndividualPropertyType2 individual) {
CharacterStringPropertyType positionString = individual.getCIIndividual().getPositionName();
if (positionString != null) { return individual.getCIIndividual().getPositionName().getCharacterString().getValue().toString(); } else { return ""; }
if (positionString != null) { return StringUtil.toUTF8String(individual.getCIIndividual().getPositionName().getCharacterString().getValue().toString()); } else { return ""; }
}

protected Map<String, Object> mapContactsAddress(CIAddressPropertyType2 address) {
Expand All @@ -527,28 +528,28 @@ protected Map<String, Object> mapContactsAddress(CIAddressPropertyType2 address)

address.getCIAddress().getDeliveryPoint().forEach(deliveryPoint -> {
String deliveryPointString = deliveryPoint.getCharacterString().getValue().toString();
deliveryPoints.add(deliveryPointString != null ? deliveryPointString : "");
deliveryPoints.add(deliveryPointString != null ? StringUtil.toUTF8String(deliveryPointString) : "");
});
addressItem.put("deliveryPoint", deliveryPoints);

CharacterStringPropertyType cityString = address.getCIAddress().getCity();
addressItem.put("city", cityString != null ? cityString.getCharacterString().getValue().toString() : "");
addressItem.put("city", cityString != null ? StringUtil.toUTF8String(cityString.getCharacterString().getValue().toString()) : "");

CharacterStringPropertyType administrativeAreaString = address.getCIAddress().getAdministrativeArea();
addressItem.put("administrativeArea", administrativeAreaString != null ? administrativeAreaString.getCharacterString().getValue().toString() : "");
addressItem.put("administrativeArea", administrativeAreaString != null ? StringUtil.toUTF8String(administrativeAreaString.getCharacterString().getValue().toString()) : "");

CharacterStringPropertyType postalCodeString = address.getCIAddress().getPostalCode();
addressItem.put("postalCode", postalCodeString != null ? postalCodeString.getCharacterString().getValue().toString() : "");
addressItem.put("postalCode", postalCodeString != null ? StringUtil.toUTF8String(postalCodeString.getCharacterString().getValue().toString()) : "");

CharacterStringPropertyType countryString = address.getCIAddress().getCountry();
addressItem.put("country", countryString != null ? countryString.getCharacterString().getValue().toString() : "");
addressItem.put("country", countryString != null ? StringUtil.toUTF8String(countryString.getCharacterString().getValue().toString()) : "");

return addressItem;
}

protected String mapContactsEmail(CharacterStringPropertyType electronicMailAddress) {
if (electronicMailAddress != null) {
return electronicMailAddress.getCharacterString().getValue().toString();
return StringUtil.toUTF8String(electronicMailAddress.getCharacterString().getValue().toString());
} else {
return "";
}
Expand All @@ -558,10 +559,10 @@ protected Map<String, String> mapContactsPhone(CITelephonePropertyType2 phone) {
Map<String, String> phoneItem = new HashMap<>();

CharacterStringPropertyType phoneString = phone.getCITelephone().getNumber();
phoneItem.put("value", phoneString != null ? phoneString.getCharacterString().getValue().toString() : "");
phoneItem.put("value", phoneString != null ? StringUtil.toUTF8String(phoneString.getCharacterString().getValue().toString()) : "");

CodeListValueType phoneCode = phone.getCITelephone().getNumberType().getCITelephoneTypeCode();
phoneItem.put("roles", phoneCode != null ? phoneCode.getCodeListValue() : "");
phoneItem.put("roles", phoneCode != null ? StringUtil.toUTF8String(phoneCode.getCodeListValue()) : "");

return phoneItem;
}
Expand All @@ -570,13 +571,13 @@ protected Map<String, String> mapContactsOnlineResource(CIOnlineResourceProperty
Map<String, String> onlineResourceItem = new HashMap<>();

CharacterStringPropertyType linkString = onlineResource.getCIOnlineResource().getLinkage();
onlineResourceItem.put("href", linkString != null ? linkString.getCharacterString().getValue().toString() : "");
onlineResourceItem.put("href", linkString != null ? StringUtil.toUTF8String(linkString.getCharacterString().getValue().toString()) : "");

CharacterStringPropertyType resourceNameString = onlineResource.getCIOnlineResource().getName();
onlineResourceItem.put("title", resourceNameString != null ? resourceNameString.getCharacterString().getValue().toString() : "");
onlineResourceItem.put("title", resourceNameString != null ? StringUtil.toUTF8String(resourceNameString.getCharacterString().getValue().toString()) : "");

CharacterStringPropertyType linkTypeString = onlineResource.getCIOnlineResource().getProtocol();
onlineResourceItem.put("type", linkTypeString != null ? linkTypeString.getCharacterString().getValue().toString() : "");
onlineResourceItem.put("type", linkTypeString != null ? StringUtil.toUTF8String(linkTypeString.getCharacterString().getValue().toString()) : "");

return onlineResourceItem;
}
Expand Down Expand Up @@ -612,7 +613,7 @@ protected List<LanguageModel> mapLanguages(MDMetadataType source) {

protected String mapLanguagesCode(MDDataIdentificationType i) {
try {
return i.getDefaultLocale().getPTLocale().getValue().getLanguage().getLanguageCode().getCodeListValue();
return StringUtil.toUTF8String(i.getDefaultLocale().getPTLocale().getValue().getLanguage().getLanguageCode().getCodeListValue());
} catch (NullPointerException e) {
return null;
}
Expand Down Expand Up @@ -646,8 +647,10 @@ protected <R> R createGeometryItems(
.map(AbstractEXGeographicExtentPropertyType::getAbstractEXGeographicExtent)
.filter(m -> m.getValue() instanceof EXBoundingPolygonType || m.getValue() instanceof EXGeographicBoundingBoxType)
.map(m -> {
if (m.getValue() instanceof EXBoundingPolygonType) {
return (EXBoundingPolygonType) m.getValue();
if (m.getValue() instanceof EXBoundingPolygonType exBoundingPolygonType) {
if (!exBoundingPolygonType.getPolygon().isEmpty() && exBoundingPolygonType.getPolygon().get(0).getAbstractGeometry() != null) {
return exBoundingPolygonType;
}
} else if (m.getValue() instanceof EXGeographicBoundingBoxType) {
return (EXGeographicBoundingBoxType) m.getValue();
}
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/au/org/aodn/esindexer/utils/StringUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package au.org.aodn.esindexer.utils;

import java.nio.charset.StandardCharsets;

public class StringUtil {
// Static method to convert to UTF-8 String
public static String toUTF8String(String input) {
return new String(input.getBytes(StandardCharsets.UTF_8));
}
}
27 changes: 27 additions & 0 deletions src/test/StringUtilTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import au.org.aodn.esindexer.utils.StringUtil;

import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;

public class StringUtilTest {
@Test
public void testToUTF8String_withAsciiString() {
String asciiString = "Hello World";
String result = StringUtil.toUTF8String(asciiString);
assertEquals(asciiString, result, "The UTF-8 conversion of an ASCII string should not change the string");
}

@Test
public void testToUTF8String_withFrenchCharacters() {
String frenchString = "Bonjour le monde! Ça va?";
String result = StringUtil.toUTF8String(frenchString);
assertEquals(frenchString, result, "The UTF-8 conversion of a string with French characters should not change the string");
}

@Test
public void testToUTF8String_withDegreeSign() {
String stringWithDegreeSign = "Temperature: 25°C";
String result = StringUtil.toUTF8String(stringWithDegreeSign);
assertEquals(stringWithDegreeSign, result, "The UTF-8 conversion of a string with a degree sign should not change the string");
}
}

0 comments on commit fd75a91

Please sign in to comment.