-
Notifications
You must be signed in to change notification settings - Fork 217
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #74 from Netflix/dupedetect
Duplicate detection validator. Based on primary key definition detect if duplicate values are added for a particular type. Used for bad data detection and validation.
- Loading branch information
Showing
2 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
103 changes: 103 additions & 0 deletions
103
...main/java/com/netflix/hollow/api/producer/validation/DuplicateDataDetectionValidator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/* | ||
* | ||
* Copyright 2017 Netflix, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
package com.netflix.hollow.api.producer.validation; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
|
||
import com.netflix.hollow.api.producer.HollowProducer.ReadState; | ||
import com.netflix.hollow.api.producer.HollowProducer.Validator; | ||
import com.netflix.hollow.core.index.HollowPrimaryKeyIndex; | ||
import com.netflix.hollow.core.index.key.PrimaryKey; | ||
import com.netflix.hollow.core.schema.HollowObjectSchema; | ||
import com.netflix.hollow.core.schema.HollowSchema; | ||
import com.netflix.hollow.core.schema.HollowSchema.SchemaType; | ||
|
||
/** | ||
* | ||
* @author lkanchanapalli | ||
* | ||
*/ | ||
public class DuplicateDataDetectionValidator implements Validator { | ||
String dataTypeName; | ||
private String[] fieldPathNames; | ||
|
||
/** | ||
* @param dataTypeName for which this duplicate data detection is needed. | ||
*/ | ||
public DuplicateDataDetectionValidator(String dataTypeName) { | ||
this.dataTypeName = dataTypeName; | ||
this.fieldPathNames = null; | ||
} | ||
|
||
/** | ||
* | ||
* @param dataTypeName: for which this duplicate data detection is needed. | ||
* @param fieldPathNames: field paths that defined a primary key | ||
*/ | ||
public DuplicateDataDetectionValidator(String dataTypeName, String[] fieldPathNames) { | ||
this.dataTypeName = dataTypeName; | ||
this.fieldPathNames = fieldPathNames; | ||
} | ||
|
||
|
||
/* (non-Javadoc) | ||
* @see com.netflix.hollow.api.producer.HollowProducer.Validator#validate(com.netflix.hollow.api.producer.HollowProducer.ReadState) | ||
*/ | ||
@Override | ||
public void validate(ReadState readState) { | ||
PrimaryKey primaryKey = getPrimaryKey(readState); | ||
HollowPrimaryKeyIndex hollowPrimaryKeyIndex = new HollowPrimaryKeyIndex(readState.getStateEngine(), primaryKey); | ||
Collection<Object[]> duplicateKeys = hollowPrimaryKeyIndex.getDuplicateKeys(); | ||
|
||
if(duplicateKeys != null && !duplicateKeys.isEmpty()){ | ||
String duplicateIds = getDuplicateIDsString(duplicateKeys); | ||
String errorMsg = String.format("Duplicate keys found for type %s. Unique key is defined as %s. Duplicate IDs are: %s", dataTypeName, | ||
Arrays.toString(primaryKey.getFieldPaths()), duplicateIds); | ||
throw new ValidationException(errorMsg); | ||
} | ||
} | ||
|
||
private String getDuplicateIDsString(Collection<Object[]> dupKeysCollection) { | ||
StringBuilder message = new StringBuilder(); | ||
for (Object[] ids: dupKeysCollection) { | ||
message.append(Arrays.toString(ids)).append(","); | ||
} | ||
return message.toString(); | ||
} | ||
|
||
private PrimaryKey getPrimaryKey(ReadState readState) { | ||
PrimaryKey primaryKey = null; | ||
|
||
if (fieldPathNames == null) { | ||
HollowSchema schema = readState.getStateEngine().getSchema(dataTypeName); | ||
if (schema.getSchemaType() != (SchemaType.OBJECT)) | ||
throw new ValidationException("Primary key validation is defined but schema type of "+ dataTypeName+" is not Object. This validation cannot be done."); | ||
HollowObjectSchema oSchema = (HollowObjectSchema) schema; | ||
primaryKey = oSchema.getPrimaryKey(); | ||
} else { | ||
primaryKey = new PrimaryKey(dataTypeName, fieldPathNames); | ||
} | ||
if (primaryKey == null) | ||
throw new ValidationException( | ||
"Primary key validation defined but unable to find primary key for data type " + dataTypeName); | ||
|
||
return primaryKey; | ||
} | ||
|
||
} |
100 changes: 100 additions & 0 deletions
100
hollow/src/test/java/com/netflix/hollow/api/producer/validation/ProducerValidationTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/* | ||
* | ||
* Copyright 2017 Netflix, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
package com.netflix.hollow.api.producer.validation; | ||
|
||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import com.netflix.hollow.api.consumer.HollowConsumer; | ||
import com.netflix.hollow.api.consumer.InMemoryBlobStore; | ||
import com.netflix.hollow.api.producer.HollowProducer; | ||
import com.netflix.hollow.api.producer.HollowProducer.Populator; | ||
import com.netflix.hollow.api.producer.HollowProducer.Validator.ValidationException; | ||
import com.netflix.hollow.api.producer.HollowProducer.WriteState; | ||
import com.netflix.hollow.api.producer.fs.HollowInMemoryBlobStager; | ||
import com.netflix.hollow.core.write.objectmapper.HollowPrimaryKey; | ||
|
||
public class ProducerValidationTests { | ||
private InMemoryBlobStore blobStore; | ||
|
||
@Before | ||
public void setUp() { | ||
blobStore = new InMemoryBlobStore(); | ||
} | ||
|
||
@Test | ||
public void duplicateDetectionFailureTest() { | ||
HollowProducer producer = HollowProducer.withPublisher(blobStore) | ||
.withBlobStager(new HollowInMemoryBlobStager()) | ||
.withValidator(new DuplicateDataDetectionValidator("TypeWithPrimaryKey")) | ||
.build(); | ||
|
||
try { | ||
//runCycle(producer, 1); | ||
producer.runCycle(new Populator() { | ||
|
||
public void populate(WriteState newState) throws Exception { | ||
newState.add(new TypeWithPrimaryKey(1, "Brad Pitt", "klsdjfla;sdjkf")); | ||
newState.add(new TypeWithPrimaryKey(1, "Angelina Jolie", "as;dlkfjasd;l")); | ||
newState.add(new TypeWithPrimaryKey(1, "Brad Pitt", "as;dlkfjasd;l")); | ||
} | ||
}); | ||
Assert.fail(); | ||
} catch(ValidationException expected) { | ||
Assert.assertEquals(1, expected.getIndividualFailures().size()); | ||
//System.out.println("Message: "+expected.getIndividualFailures().get(0).getMessage()); | ||
Assert.assertTrue(expected.getIndividualFailures().get(0).getMessage().startsWith("Duplicate keys found for type TypeWithPrimaryKey")); | ||
} | ||
} | ||
|
||
@Test | ||
public void duplicateDetectionSuccessTest() { | ||
HollowProducer producer = HollowProducer.withPublisher(blobStore) | ||
.withBlobStager(new HollowInMemoryBlobStager()) | ||
.withValidator(new DuplicateDataDetectionValidator("TypeWithPrimaryKey")) | ||
.build(); | ||
|
||
//runCycle(producer, 1); | ||
producer.runCycle(new Populator() { | ||
|
||
public void populate(WriteState newState) throws Exception { | ||
newState.add(new TypeWithPrimaryKey(1, "Brad Pitt", "klsdjfla;sdjkf")); | ||
newState.add(new TypeWithPrimaryKey(1, "Angelina Jolie", "as;dlkfjasd;l")); | ||
} | ||
}); | ||
|
||
HollowConsumer consumer = HollowConsumer.withBlobRetriever(blobStore).build(); | ||
consumer.triggerRefresh(); | ||
Assert.assertEquals(2, consumer.getStateEngine().getTypeState("TypeWithPrimaryKey").getPopulatedOrdinals().cardinality()); | ||
} | ||
|
||
|
||
@HollowPrimaryKey(fields={"id","name"}) | ||
static class TypeWithPrimaryKey{ | ||
int id; | ||
String name; | ||
String desc; | ||
|
||
TypeWithPrimaryKey(int id, String name, String desc){ | ||
this.id=id; | ||
this.name=name; | ||
this.desc = desc; | ||
} | ||
} | ||
} |