Skip to content

Commit

Permalink
Resolves #175.
Browse files Browse the repository at this point in the history
  • Loading branch information
khituras committed Jan 4, 2024
1 parent cb92ede commit c302155
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
2 changes: 1 addition & 1 deletion jcore-elasticsearch-consumer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
perfectly valid ElasticSearch default JSON which does not require the plugin. Then, however, the preanalyzed
data format cannot be used because this is the format that required the ElasticSearch plugin.
</description>

<version>2.6.3-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>de.julielab</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
indexName = (String) getContext().getConfigParameterValue(PARAM_INDEX_NAME);
type = (String) getContext().getConfigParameterValue(PARAM_TYPE);
batchSize = Optional.ofNullable((Integer) getContext().getConfigParameterValue(PARAM_BATCH_SIZE)).orElse(50);
bulkCommand = new ArrayList<>(4000);
bulkCommand = new ArrayList<>(batchSize*2);
deleteDocsBeforeIndexing = (boolean) Optional.ofNullable(getContext().getConfigParameterValue(PARAM_DELETE_DOCS_BEFORE_INDEXING)).orElse(false);
docIdField = (String) getContext().getConfigParameterValue(PARAM_DOC_ID_FIELD);

Expand Down Expand Up @@ -265,12 +265,13 @@ private void postBulkIndexAction() throws AnalysisEngineProcessException {
if (subList.isEmpty())
continue;
lastIndex += subList.size();
log.debug("Sending {} documents to index {}.", subList.size(), indexName);
log.debug("Sending {} documents to index {}.", subList.size()/2, indexName);
long time = System.currentTimeMillis();
// The bulk format requires us to have a newline also after the
// last
// line; it will be ignored otherwise!
String bulkCommandString = StringUtils.join(subList, "\n") + "\n";
log.trace(bulkCommandString);
StringEntity bulkIndexEntity = new StringEntity(bulkCommandString, "UTF-8");
indexPost.setEntity(bulkIndexEntity);
HttpResponse response = httpclient.execute(indexPost);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@

import org.tartarus.snowball.SnowballProgram;

import java.lang.reflect.InvocationTargetException;
import java.util.List;

public class SnowballFilter extends AbstractFilter {

private SnowballProgram stemmer;

public SnowballFilter() {
this("org.tartarus.snowball.ext.EnglishStemmer");
}

public SnowballFilter(String snowballProgram) {
super();
Class<? extends SnowballProgram> stemClass;
try {
stemClass = Class.forName("org.tartarus.snowball.ext.EnglishStemmer").asSubclass(SnowballProgram.class);
stemmer = stemClass.newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
stemClass = Class.forName(snowballProgram).asSubclass(SnowballProgram.class);
stemmer = stemClass.getDeclaredConstructor().newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
throw new RuntimeException(e);
}
}
Expand Down

0 comments on commit c302155

Please sign in to comment.