Skip to content

Commit

Permalink
OPENNLP-1588 Clean out deprecated code marked for removal
Browse files Browse the repository at this point in the history
- removes deprecated getters in several records
- removes a very long time deprecated constructor of TokenizerME
- removes deprecated field in DefaultNameContextGenerator and adapts tests accordingly
- marks a constructor of ADNameSampleStream as "for removal" to indicate actual removal is more likely in upcoming releases
- marks deprecated 'NameFinderEventStream#generateOutcomes(..)' as "for removal" to indicate actual removal is more likely in upcoming releases
- clears unused code from NameFinderME
- fixes resource leak in TokenNameFinderFactory
  • Loading branch information
mawiesne committed Jul 4, 2024
1 parent 7cf55ee commit 4188ab0
Show file tree
Hide file tree
Showing 14 changed files with 15 additions and 171 deletions.
10 changes: 0 additions & 10 deletions opennlp-dl/src/main/java/opennlp/dl/SpanEnd.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@

public record SpanEnd(int index, int characterEnd) {

@Deprecated(forRemoval = true)
public int getIndex() {
return index;
}

@Deprecated(forRemoval = true)
public int getCharacterEnd() {
return characterEnd;
}

@Override
public String toString() {
return "index: " + index + "; character end: " + characterEnd;
Expand Down
19 changes: 0 additions & 19 deletions opennlp-dl/src/main/java/opennlp/dl/Tokens.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,4 @@
*/
public record Tokens(String[] tokens, long[] ids, long[] mask, long[] types) {

@Deprecated(forRemoval = true)
public String[] getTokens() {
return tokens;
}

@Deprecated(forRemoval = true)
public long[] getIds() {
return ids;
}

@Deprecated(forRemoval = true)
public long[] getMask() {
return mask;
}

@Deprecated(forRemoval = true)
public long[] getTypes() {
return types;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,4 @@
*/
public record Entry(StringList tokens, Attributes attributes) {

@Deprecated(forRemoval = true)
public StringList getTokens() {
return tokens;
}

@Deprecated(forRemoval = true)
public Attributes getAttributes() {
return attributes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public ADNameSampleStream(ObjectStream<String> lineStream, boolean splitHyphenat
* @param splitHyphenatedTokens If {@code true} hyphenated tokens will be separated:
* "carros-monstro" &gt; "carros" "-" "monstro".
*/
@Deprecated
@Deprecated(forRemoval = true)
public ADNameSampleStream(InputStreamFactory in, String charsetName,
boolean splitHyphenatedTokens) throws IOException {
this(new PlainTextByLineStream(in, charsetName), splitHyphenatedTokens);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package opennlp.tools.langdetect;

import java.io.Serial;
import java.io.Serializable;
import java.util.Objects;

Expand All @@ -25,23 +26,14 @@
*/
public record LanguageSample(Language language, CharSequence context) implements Serializable {

private static final long serialVersionUID = -2222893493240468729L;
@Serial
private static final long serialVersionUID = -4791295976215940258L;

public LanguageSample(Language language, CharSequence context) {
this.language = Objects.requireNonNull(language, "language must not be null");
this.context = Objects.requireNonNull(context, "context must not be null");
}

@Deprecated(forRemoval = true)
public Language getLanguage() {
return language;
}

@Deprecated(forRemoval = true)
public CharSequence getContext() {
return context;
}

@Override
public String toString() {
return language.getLang() + '\t' + context;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,4 @@
*/
public record ProbingLanguageDetectionResult(Language[] languages, int length) {

@Deprecated(forRemoval = true)
public Language[] getLanguages() {
return languages;
}

@Deprecated(forRemoval = true)
public int getLength() {
return length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,10 @@

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.BigramNameFeatureGenerator;
import opennlp.tools.util.featuregen.CachedFeatureGenerator;
import opennlp.tools.util.featuregen.FeatureGeneratorUtil;
import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator;
import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator;
import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
import opennlp.tools.util.featuregen.TokenFeatureGenerator;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;

/**
* A {@link NameContextGenerator} implementation for determining contextual features
Expand All @@ -38,31 +32,15 @@ public class DefaultNameContextGenerator implements NameContextGenerator {

protected AdaptiveFeatureGenerator[] featureGenerators;

@Deprecated
private static final AdaptiveFeatureGenerator WINDOW_FEATURES = new CachedFeatureGenerator(
new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2),
new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2),
new OutcomePriorFeatureGenerator(),
new PreviousMapFeatureGenerator(),
new BigramNameFeatureGenerator());

/**
* Creates a name context generator with the specified
* {@link AdaptiveFeatureGenerator feature generators}.
*
* @param featureGenerators One or more {@link AdaptiveFeatureGenerator feature generators}.
* If none are provided, a default config ({@link #WINDOW_FEATURES})
* will be used.
*/
public DefaultNameContextGenerator(AdaptiveFeatureGenerator... featureGenerators) {

if (featureGenerators != null) {
this.featureGenerators = featureGenerators;
}
else { // use defaults
this.featureGenerators =
new AdaptiveFeatureGenerator[]{WINDOW_FEATURES, new PreviousMapFeatureGenerator()};
}
this.featureGenerators = Objects.requireNonNull(
featureGenerators, "Please specify at least one featureGenerator");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type,
*
* @deprecated use the {@link BioCodec} implementation of the SequenceValidator instead!
*/
@Deprecated
@Deprecated(forRemoval = true)
public static String[] generateOutcomes(Span[] names, String type, int length) {
String[] outcomes = new String[length];
Arrays.fill(outcomes, NameFinderME.OTHER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package opennlp.tools.namefind;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
Expand All @@ -44,9 +43,7 @@
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
import opennlp.tools.util.featuregen.GeneratorFactory;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;

/**
Expand Down Expand Up @@ -92,26 +89,6 @@ public NameFinderME(TokenNameFinderModel model) {
new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
}

private static AdaptiveFeatureGenerator createFeatureGenerator(
byte[] generatorDescriptor, final Map<String, Object> resources)
throws IOException {
AdaptiveFeatureGenerator featureGenerator;

if (generatorDescriptor != null) {
featureGenerator = GeneratorFactory.create(new ByteArrayInputStream(
generatorDescriptor), key -> {
if (resources != null) {
return resources.get(key);
}
return null;
});
} else {
featureGenerator = null;
}

return featureGenerator;
}

@Override
public Span[] find(String[] tokens) {
return find(tokens, EMPTY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,8 @@ public AdaptiveFeatureGenerator createFeatureGenerators() {
featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
}

InputStream descriptorIn = new ByteArrayInputStream(featureGeneratorBytes);

AdaptiveFeatureGenerator generator;
try {
generator = GeneratorFactory.create(descriptorIn, key -> {
try (InputStream descriptorIn = new ByteArrayInputStream(featureGeneratorBytes)) {
return GeneratorFactory.create(descriptorIn, key -> {
if (artifactProvider != null) {
return artifactProvider.getArtifact(key);
}
Expand All @@ -273,11 +270,10 @@ public AdaptiveFeatureGenerator createFeatureGenerators() {
// throwing a Runtime Exception is reasonable

throw new FeatureGeneratorCreationError(e);
} catch (IOException e) {
}
catch (IOException e) {
throw new IllegalStateException("Reading from mem cannot result in an I/O error", e);
}

return generator;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.tokenize.lang.Factory;
import opennlp.tools.util.DownloadUtil;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
Expand Down Expand Up @@ -143,33 +140,6 @@ public TokenizerME(TokenizerModel model) {
tokProbs = new ArrayList<>(50);
}

/**
* @deprecated use {@link TokenizerFactory} to extend the Tokenizer
* functionality
*/
@Deprecated
public TokenizerME(TokenizerModel model, Factory factory) {
String languageCode = model.getLanguage();

this.alphanumeric = factory.getAlphanumeric(languageCode);
this.cg = factory.createTokenContextGenerator(languageCode,
getAbbreviations(model.getAbbreviations()));

this.model = model.getMaxentModel();
useAlphaNumericOptimization = model.useAlphaNumericOptimization();

abbDict = model.getAbbreviations();
newTokens = new ArrayList<>();
tokProbs = new ArrayList<>(50);
}

private static Set<String> getAbbreviations(Dictionary abbreviations) {
if (abbreviations == null) {
return Collections.emptySet();
}
return abbreviations.asStringSet();
}

/**
* @return the probabilities associated with the most recent calls to
* {@link TokenizerME#tokenize(String)} or {@link TokenizerME#tokenizePos(String)}.
Expand Down
15 changes: 0 additions & 15 deletions opennlp-tools/src/main/java/opennlp/tools/util/TokenTag.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,6 @@ public TokenTag(String token, String tag, String[] additionalData) {
}
}

@Deprecated(forRemoval = true)
public String getToken() {
return token;
}

@Deprecated(forRemoval = true)
public String getTag() {
return tag;
}

@Deprecated(forRemoval = true)
public String[] getAdditionalData() {
return additionalData;
}

public static String[] extractTokens(TokenTag[] tuples) {
String[] tokens = new String[tuples.length];
for (int i = 0; i < tuples.length; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import opennlp.tools.namefind.DefaultNameContextGenerator;
import opennlp.tools.namefind.NameContextGenerator;
import opennlp.tools.namefind.NameFinderEventStream;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;

public class TwoPassDataIndexerTest {

Expand Down Expand Up @@ -74,8 +73,7 @@ void testIndexWithNewline() throws IOException {

String[] sentence = "He belongs to Apache \n Software Foundation .".split(" ");

NameContextGenerator CG = new DefaultNameContextGenerator(
(AdaptiveFeatureGenerator[]) null);
NameContextGenerator CG = new TokenNameFinderFactory().createContextGenerator();

NameSample nameSample = new NameSample(sentence,
new Span[] {new Span(3, 7)}, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.Span;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;

/**
* This is the test class for {@link NameFinderEventStream}.
Expand All @@ -37,18 +36,14 @@ public class NameFinderEventStreamTest {
"the", "hint", "and", "enjoyed", "a", "delicious", "traditional", "meal",
"."};

private static final NameContextGenerator CG = new DefaultNameContextGenerator(
(AdaptiveFeatureGenerator[]) null);
private static final NameContextGenerator CG = new TokenNameFinderFactory().createContextGenerator();

/**
* Tests the correctly generated outcomes for a test sentence.
*/
@Test
void testOutcomesForSingleTypeSentence() throws IOException {

NameContextGenerator CG = new DefaultNameContextGenerator(
(AdaptiveFeatureGenerator[]) null);

NameSample nameSample = new NameSample(SENTENCE,
new Span[] {new Span(0, 2, "person")}, false);

Expand Down

0 comments on commit 4188ab0

Please sign in to comment.