Skip to content

Commit

Permalink
OPENNLP-1589 - Use Arrays.equals(...)
Browse files Browse the repository at this point in the history
  • Loading branch information
rzo1 committed Jul 4, 2024
1 parent 60e080c commit 43e6d39
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,13 @@ public NameContextGenerator createContextGenerator() {
AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators();

if (featureGenerator == null) {
featureGenerator = new CachedFeatureGenerator(
featureGenerator = new CachedFeatureGenerator(new AggregatedFeatureGenerator(
new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2),
new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2),
new OutcomePriorFeatureGenerator(),
new PreviousMapFeatureGenerator(),
new BigramNameFeatureGenerator(),
new SentenceFeatureGenerator(true, false));
new SentenceFeatureGenerator(true, false)));
}

return new DefaultNameContextGenerator(featureGenerator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package opennlp.tools.util.featuregen;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import opennlp.tools.util.Cache;
Expand All @@ -37,15 +38,13 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator {
private long numberOfCacheHits;
private long numberOfCacheMisses;

@Deprecated
public CachedFeatureGenerator(AdaptiveFeatureGenerator... generators) {
this.generator = new AggregatedFeatureGenerator(generators);
contextsCache = new Cache<>(100);
public CachedFeatureGenerator(AdaptiveFeatureGenerator generator, int cacheSize) {
this.generator = generator;
contextsCache = new Cache<>(cacheSize);
}

public CachedFeatureGenerator(AdaptiveFeatureGenerator generator) {
this.generator = generator;
contextsCache = new Cache<>(100);
this(generator, 100);
}

@Override
Expand All @@ -54,7 +53,7 @@ public void createFeatures(List<String> features, String[] tokens, int index,

List<String> cacheFeatures;

if (tokens == prevTokens) {
if (Arrays.equals(prevTokens, tokens)) {
cacheFeatures = contextsCache.get(index);

if (cacheFeatures != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,31 @@ void setUp() {
features = new ArrayList<>();
}

@Test
void testCachingOfRealWorldSentence() {
CachedFeatureGenerator generator = new CachedFeatureGenerator(identityGenerator);
final String[] sentence = "He belongs to Apache \n Software Foundation .".split(" ");
int testIndex = 0;

// after this call features are cached for testIndex
generator.createFeatures(features, sentence, testIndex, null);
Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
Assertions.assertEquals(0, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex, null);
Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex + 1, null);
Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex + 1, null);
Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(2, generator.getNumberOfCacheHits());

}

/**
* Tests if cache works for one sentence and two different token indexes.
*/
Expand Down Expand Up @@ -85,7 +110,7 @@ void testCachingOfSentence() {

int testIndex2 = testIndex + 1;

generator.createFeatures(features, Arrays.copyOf(testSentence1, testSentence1.length), testIndex2, null);
generator.createFeatures(features, Arrays.copyOf(testSentence1, testSentence1.length), testIndex2, null);

Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());
Expand Down Expand Up @@ -115,7 +140,7 @@ void testCacheClearAfterSentenceChange() {
features.clear();

// use another sentence but same index
generator.createFeatures(features, testSentence2, testIndex, null);
generator.createFeatures(features, Arrays.copyOf(testSentence2, testSentence2.length), testIndex, null);

Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(0, generator.getNumberOfCacheHits());
Expand All @@ -127,10 +152,7 @@ void testCacheClearAfterSentenceChange() {

// check if features are really cached
final String expectedToken = testSentence2[testIndex];

testSentence2[testIndex] = null;

generator.createFeatures(features, testSentence2, testIndex, null);
generator.createFeatures(features, Arrays.copyOf(testSentence2, testSentence2.length), testIndex, null);

Assertions.assertTrue(features.contains(expectedToken));
Assertions.assertEquals(1, features.size());
Expand Down

0 comments on commit 43e6d39

Please sign in to comment.