Skip to content

Commit

Permalink
Merge pull request #74 from Netflix/dupedetect
Browse files Browse the repository at this point in the history
Duplicate detection validator. Based on primary key definition detect if duplicate values are added for a particular type. Used for bad data detection and validation.
  • Loading branch information
lkancode authored Aug 4, 2017
2 parents 324ea78 + 8162153 commit ecabde9
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
*
* Copyright 2017 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.netflix.hollow.api.producer.validation;

import java.util.Arrays;
import java.util.Collection;

import com.netflix.hollow.api.producer.HollowProducer.ReadState;
import com.netflix.hollow.api.producer.HollowProducer.Validator;
import com.netflix.hollow.core.index.HollowPrimaryKeyIndex;
import com.netflix.hollow.core.index.key.PrimaryKey;
import com.netflix.hollow.core.schema.HollowObjectSchema;
import com.netflix.hollow.core.schema.HollowSchema;
import com.netflix.hollow.core.schema.HollowSchema.SchemaType;

/**
*
* @author lkanchanapalli
*
*/
public class DuplicateDataDetectionValidator implements Validator {
String dataTypeName;
private String[] fieldPathNames;

/**
* @param dataTypeName for which this duplicate data detection is needed.
*/
public DuplicateDataDetectionValidator(String dataTypeName) {
this.dataTypeName = dataTypeName;
this.fieldPathNames = null;
}

/**
*
* @param dataTypeName: for which this duplicate data detection is needed.
* @param fieldPathNames: field paths that defined a primary key
*/
public DuplicateDataDetectionValidator(String dataTypeName, String[] fieldPathNames) {
this.dataTypeName = dataTypeName;
this.fieldPathNames = fieldPathNames;
}


/* (non-Javadoc)
* @see com.netflix.hollow.api.producer.HollowProducer.Validator#validate(com.netflix.hollow.api.producer.HollowProducer.ReadState)
*/
@Override
public void validate(ReadState readState) {
PrimaryKey primaryKey = getPrimaryKey(readState);
HollowPrimaryKeyIndex hollowPrimaryKeyIndex = new HollowPrimaryKeyIndex(readState.getStateEngine(), primaryKey);
Collection<Object[]> duplicateKeys = hollowPrimaryKeyIndex.getDuplicateKeys();

if(duplicateKeys != null && !duplicateKeys.isEmpty()){
String duplicateIds = getDuplicateIDsString(duplicateKeys);
String errorMsg = String.format("Duplicate keys found for type %s. Unique key is defined as %s. Duplicate IDs are: %s", dataTypeName,
Arrays.toString(primaryKey.getFieldPaths()), duplicateIds);
throw new ValidationException(errorMsg);
}
}

private String getDuplicateIDsString(Collection<Object[]> dupKeysCollection) {
StringBuilder message = new StringBuilder();
for (Object[] ids: dupKeysCollection) {
message.append(Arrays.toString(ids)).append(",");
}
return message.toString();
}

private PrimaryKey getPrimaryKey(ReadState readState) {
PrimaryKey primaryKey = null;

if (fieldPathNames == null) {
HollowSchema schema = readState.getStateEngine().getSchema(dataTypeName);
if (schema.getSchemaType() != (SchemaType.OBJECT))
throw new ValidationException("Primary key validation is defined but schema type of "+ dataTypeName+" is not Object. This validation cannot be done.");
HollowObjectSchema oSchema = (HollowObjectSchema) schema;
primaryKey = oSchema.getPrimaryKey();
} else {
primaryKey = new PrimaryKey(dataTypeName, fieldPathNames);
}
if (primaryKey == null)
throw new ValidationException(
"Primary key validation defined but unable to find primary key for data type " + dataTypeName);

return primaryKey;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
*
* Copyright 2017 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.netflix.hollow.api.producer.validation;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import com.netflix.hollow.api.consumer.HollowConsumer;
import com.netflix.hollow.api.consumer.InMemoryBlobStore;
import com.netflix.hollow.api.producer.HollowProducer;
import com.netflix.hollow.api.producer.HollowProducer.Populator;
import com.netflix.hollow.api.producer.HollowProducer.Validator.ValidationException;
import com.netflix.hollow.api.producer.HollowProducer.WriteState;
import com.netflix.hollow.api.producer.fs.HollowInMemoryBlobStager;
import com.netflix.hollow.core.write.objectmapper.HollowPrimaryKey;

public class ProducerValidationTests {
private InMemoryBlobStore blobStore;

@Before
public void setUp() {
blobStore = new InMemoryBlobStore();
}

@Test
public void duplicateDetectionFailureTest() {
HollowProducer producer = HollowProducer.withPublisher(blobStore)
.withBlobStager(new HollowInMemoryBlobStager())
.withValidator(new DuplicateDataDetectionValidator("TypeWithPrimaryKey"))
.build();

try {
//runCycle(producer, 1);
producer.runCycle(new Populator() {

public void populate(WriteState newState) throws Exception {
newState.add(new TypeWithPrimaryKey(1, "Brad Pitt", "klsdjfla;sdjkf"));
newState.add(new TypeWithPrimaryKey(1, "Angelina Jolie", "as;dlkfjasd;l"));
newState.add(new TypeWithPrimaryKey(1, "Brad Pitt", "as;dlkfjasd;l"));
}
});
Assert.fail();
} catch(ValidationException expected) {
Assert.assertEquals(1, expected.getIndividualFailures().size());
//System.out.println("Message: "+expected.getIndividualFailures().get(0).getMessage());
Assert.assertTrue(expected.getIndividualFailures().get(0).getMessage().startsWith("Duplicate keys found for type TypeWithPrimaryKey"));
}
}

@Test
public void duplicateDetectionSuccessTest() {
HollowProducer producer = HollowProducer.withPublisher(blobStore)
.withBlobStager(new HollowInMemoryBlobStager())
.withValidator(new DuplicateDataDetectionValidator("TypeWithPrimaryKey"))
.build();

//runCycle(producer, 1);
producer.runCycle(new Populator() {

public void populate(WriteState newState) throws Exception {
newState.add(new TypeWithPrimaryKey(1, "Brad Pitt", "klsdjfla;sdjkf"));
newState.add(new TypeWithPrimaryKey(1, "Angelina Jolie", "as;dlkfjasd;l"));
}
});

HollowConsumer consumer = HollowConsumer.withBlobRetriever(blobStore).build();
consumer.triggerRefresh();
Assert.assertEquals(2, consumer.getStateEngine().getTypeState("TypeWithPrimaryKey").getPopulatedOrdinals().cardinality());
}


@HollowPrimaryKey(fields={"id","name"})
static class TypeWithPrimaryKey{
int id;
String name;
String desc;

TypeWithPrimaryKey(int id, String name, String desc){
this.id=id;
this.name=name;
this.desc = desc;
}
}
}

0 comments on commit ecabde9

Please sign in to comment.