Skip to content

Commit

Permalink
OPENNLP-1589 - Implements a more aggressive caching strategy based on…
Browse files Browse the repository at this point in the history
… a dual caching strategy; relies on cache evication as implemented by the Cache class of OpenNLP.

 - Use Arrays.equals(...)
 - Demonstrate that caching based on identity is broken.
  • Loading branch information
rzo1 committed Jul 4, 2024
1 parent 4188ab0 commit 9bac8a7
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,13 @@ public NameContextGenerator createContextGenerator() {
AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators();

if (featureGenerator == null) {
featureGenerator = new CachedFeatureGenerator(
featureGenerator = new CachedFeatureGenerator(new AggregatedFeatureGenerator(
new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2),
new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2),
new OutcomePriorFeatureGenerator(),
new PreviousMapFeatureGenerator(),
new BigramNameFeatureGenerator(),
new SentenceFeatureGenerator(true, false));
new SentenceFeatureGenerator(true, false)));
}

return new DefaultNameContextGenerator(featureGenerator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package opennlp.tools.util.featuregen;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import opennlp.tools.util.Cache;
Expand All @@ -30,52 +31,42 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator {

private final AdaptiveFeatureGenerator generator;

private String[] prevTokens;

private final Cache<Integer, List<String>> contextsCache;
private final Cache<Integer, Cache<Integer, List<String>>> contextCaches;
private final int cacheSize;

private long numberOfCacheHits;
private long numberOfCacheMisses;

@Deprecated
public CachedFeatureGenerator(AdaptiveFeatureGenerator... generators) {
this.generator = new AggregatedFeatureGenerator(generators);
contextsCache = new Cache<>(100);
public CachedFeatureGenerator(AdaptiveFeatureGenerator generator, int cacheSize) {
this.generator = generator;
this.contextCaches = new Cache<>(cacheSize);
this.cacheSize = cacheSize;
}

public CachedFeatureGenerator(AdaptiveFeatureGenerator generator) {
this.generator = generator;
contextsCache = new Cache<>(100);
this(generator, 100);
}

@Override
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
String[] previousOutcomes) {

List<String> cacheFeatures;
final int tokenHash = Arrays.hashCode(tokens);

if (tokens == prevTokens) {
cacheFeatures = contextsCache.get(index);

if (cacheFeatures != null) {
numberOfCacheHits++;
features.addAll(cacheFeatures);
return;
}
final Cache<Integer, List<String>> contextCache = contextCaches.computeIfAbsent(tokenHash,
k -> new Cache<>(cacheSize));
List<String> cacheFeatures = contextCache.get(index);

if (cacheFeatures != null) {
numberOfCacheHits++;
features.addAll(cacheFeatures);
} else {
contextsCache.clear();
prevTokens = tokens;
numberOfCacheMisses++;
cacheFeatures = new ArrayList<>();
generator.createFeatures(cacheFeatures, tokens, index, previousOutcomes);
contextCache.put(index, cacheFeatures);
features.addAll(cacheFeatures);
}

cacheFeatures = new ArrayList<>();

numberOfCacheMisses++;

generator.createFeatures(cacheFeatures, tokens, index, previousOutcomes);

contextsCache.put(index, cacheFeatures);
features.addAll(cacheFeatures);
}

@Override
Expand All @@ -102,6 +93,12 @@ public long getNumberOfCacheMisses() {
return numberOfCacheMisses;
}

public void clearCache() {
this.contextCaches.clear();
this.numberOfCacheMisses = 0;
this.numberOfCacheHits = 0;
}

@Override
public String toString() {
return super.toString() + ": hits=" + numberOfCacheHits
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package opennlp.tools.util.featuregen;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.junit.jupiter.api.Assertions;
Expand Down Expand Up @@ -47,6 +48,40 @@ void setUp() {
features = new ArrayList<>();
}

@Test
void testCachingOfRealWorldSentence() {
CachedFeatureGenerator generator = new CachedFeatureGenerator(identityGenerator);
final String[] sentence = "He belongs to Apache \n Software Foundation .".split(" ");
int testIndex = 0;

// after this call features are cached for testIndex
generator.createFeatures(features, sentence, testIndex, null);
Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
Assertions.assertEquals(0, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex, null);
Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex + 1, null);
Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex + 1, null);
Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(2, generator.getNumberOfCacheHits());

generator.clearCache();

Assertions.assertEquals(0, generator.getNumberOfCacheMisses());
Assertions.assertEquals(0, generator.getNumberOfCacheHits());

generator.createFeatures(features, sentence, testIndex + 1, null);
Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
Assertions.assertEquals(0, generator.getNumberOfCacheHits());

}

/**
* Tests if cache works for one sentence and two different token indexes.
*/
Expand All @@ -70,9 +105,7 @@ void testCachingOfSentence() {

final String expectedToken = testSentence1[testIndex];

testSentence1[testIndex] = null;

generator.createFeatures(features, testSentence1, testIndex, null);
generator.createFeatures(features, Arrays.copyOf(testSentence1, testSentence1.length), testIndex, null);

Assertions.assertEquals(1, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());
Expand All @@ -86,7 +119,7 @@ void testCachingOfSentence() {

int testIndex2 = testIndex + 1;

generator.createFeatures(features, testSentence1, testIndex2, null);
generator.createFeatures(features, Arrays.copyOf(testSentence1, testSentence1.length), testIndex2, null);

Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(1, generator.getNumberOfCacheHits());
Expand Down Expand Up @@ -116,7 +149,7 @@ void testCacheClearAfterSentenceChange() {
features.clear();

// use another sentence but same index
generator.createFeatures(features, testSentence2, testIndex, null);
generator.createFeatures(features, Arrays.copyOf(testSentence2, testSentence2.length), testIndex, null);

Assertions.assertEquals(2, generator.getNumberOfCacheMisses());
Assertions.assertEquals(0, generator.getNumberOfCacheHits());
Expand All @@ -128,10 +161,7 @@ void testCacheClearAfterSentenceChange() {

// check if features are really cached
final String expectedToken = testSentence2[testIndex];

testSentence2[testIndex] = null;

generator.createFeatures(features, testSentence2, testIndex, null);
generator.createFeatures(features, Arrays.copyOf(testSentence2, testSentence2.length), testIndex, null);

Assertions.assertTrue(features.contains(expectedToken));
Assertions.assertEquals(1, features.size());
Expand Down

0 comments on commit 9bac8a7

Please sign in to comment.