Skip to content
This repository has been archived by the owner on May 27, 2020. It is now read-only.

Feature/build custom analyzer #328

Open
wants to merge 40 commits into
base: branch-3.0.13
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
e42179e
Added tokenizers
Apr 24, 2017
64aee42
Add lowercase, edgeNGram and thai tokenizers
jpgilaberte Apr 26, 2017
1eec1d5
Reformat code
jpgilaberte Apr 26, 2017
9966fe0
Add tokenizers in builder module
jpgilaberte Apr 28, 2017
491987b
Scala refactor in tokenizer feature
jpgilaberte May 16, 2017
33b3011
Add license
jpgilaberte May 16, 2017
e999d1f
Add license in scala files
jpgilaberte May 16, 2017
567c0df
Add license in test files
jpgilaberte May 16, 2017
563c76c
Add license in custom analyzer
jpgilaberte May 16, 2017
049d826
Refactor tokenizers
jpgilaberte Jun 2, 2017
048a1df
Add charFilters
jpgilaberte Jun 2, 2017
1645cf0
Add tokenFilter
jpgilaberte Jun 2, 2017
5ec11fd
Add builder objects
jpgilaberte Jun 2, 2017
759c248
Add plugin Test
jpgilaberte Jun 2, 2017
c98e3fc
Add testAt CustomAnalizer
jpgilaberte Jun 2, 2017
3461943
Add JavaDoc in builder
jpgilaberte Jun 7, 2017
598f314
Add ScalaDoc in plugin
jpgilaberte Jun 8, 2017
a1d5f0f
Add TokenFilter documentation
jpgilaberte Jun 12, 2017
3532cab
Fix RST format
jpgilaberte Jun 12, 2017
af386ef
Fix RST format
jpgilaberte Jun 12, 2017
c63b553
Fix RST format
jpgilaberte Jun 12, 2017
b16b9fc
Fix package name format
jpgilaberte Jun 12, 2017
a4cd8f3
Fix package name format
jpgilaberte Jun 12, 2017
3822c65
Fix mandatory column size
jpgilaberte Jun 12, 2017
5ed61a8
Fix mandatory column size
jpgilaberte Jun 12, 2017
cfa3a20
Add more TokenFilters
jpgilaberte Jun 13, 2017
b45479e
Add new TokenFilter documentation
jpgilaberte Jun 13, 2017
f80c83f
Fix rst format
jpgilaberte Jun 13, 2017
1c55d09
Fix rst format
jpgilaberte Jun 13, 2017
29d2fa4
Fix persian charfilter
jpgilaberte Jun 13, 2017
cf88e27
Fix documentation
jpgilaberte Jun 14, 2017
a2f7085
Add char filter test
jpgilaberte Jun 14, 2017
956de0a
Add token filter test
jpgilaberte Jun 14, 2017
4551f00
Add token filters in builder
jpgilaberte Jun 14, 2017
fc10a93
Add field in HtmlStripCharFilter
jpgilaberte Jun 14, 2017
e2dc196
Refactor test package
jpgilaberte Jun 14, 2017
da3a8dd
Fix documentation
jpgilaberte Jun 15, 2017
d01f7ab
Add token filter test
jpgilaberte Jun 15, 2017
b7a5770
Refactor CustomAnalyzerIT
jpgilaberte Jun 15, 2017
270f62b
Add CustomAnalyzerIT removed
jpgilaberte Jun 15, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add testAt CustomAnalizer
jpgilaberte committed Jun 2, 2017
commit c98e3fc5e83fc0d21029da4bc28fd9ce959cfdc8
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.testsAT.schema.analysis.tokenizer;

import com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenizer.NGramTokenizer;
import com.stratio.cassandra.lucene.testsAT.BaseIT;
import com.stratio.cassandra.lucene.testsAT.util.CassandraUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import static com.stratio.cassandra.lucene.builder.Builder.*;
import static com.stratio.cassandra.lucene.builder.Builder.match;

/**
* Created by jpgilaberte on 2/06/17.
*/
@RunWith(JUnit4.class)
public class CustomAnalyzerIT extends BaseIT {
private static CassandraUtils utils;

@BeforeClass
public static void before() {}

@AfterClass
public static void after() {
CassandraUtils.dropKeyspaceIfNotNull(utils);
}

@Test
public void testCustomAnalyzer() {
utils = CassandraUtils.builder("tokenizer")
.withPartitionKey("pk")
.withColumn("pk", "int")
.withColumn("rc", "text", textMapper().analyzer("en"))
.withAnalyzer("en", customAnalyzer(new NGramTokenizer(2,2)))
.build()
.createKeyspace()
.createTable()
.insert("pk,rc", 1, "aabb")
.createIndex().refresh()
.filter(all()).check(1)
.filter(none()).check(0)
.filter(match("rc", "aa")).check(1)
.filter(match("rc", "ab")).check(1)
.filter(match("rc", "bb")).check(1);
}
}
Original file line number Diff line number Diff line change
@@ -44,6 +44,25 @@ public static void after() {
CassandraUtils.dropKeyspaceIfNotNull(utils);
}

// @Test
// public void testClassicTokenizer() {
// utils = CassandraUtils.builder("tokenizer")
// .withPartitionKey("pk")
// .withColumn("pk", "int")
// .withColumn("rc", "text", textMapper().analyzer("en"))
// .withAnalyzer("en", customAnalyzer(new NGramTokenizer(2,1),
// null,
// null))
// .build()
// .createKeyspace()
// .createTable()
// .insert("pk,rc", 1, "aabb")
// .createIndex().refresh()
// .filter(all()).check(1)
// .filter(none()).check(0)
// .filter(match("rc", "aa")).check(1)
// .filter(match("rc", "ab")).check(1);
// }
@Test
public void testClassicTokenizer() {
utils = CassandraUtils.builder("tokenizer")
@@ -218,7 +237,6 @@ public void testLetterTokenizer() {
.filter(phrase("rc", "The 2 QUICK Brown-Foxes jumped the")).check(1)
.filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone. and/o")).check(0)
.filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone.")).check(0)
//TODO: check this behaviour
.filter(contains("rc", "The 2 QUICK Brown-Foxes")).check(1)
.filter(contains("rc", "jump")).check(0)
.filter(contains("rc", "and/or")).check(1)
@@ -249,7 +267,6 @@ public void testLowerCaseTokenizer() {
.filter(phrase("rc", "The 2 QUICK Brown-Foxes jumped the")).check(1)
.filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone. and/o")).check(0)
.filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone.")).check(0)
//TODO: check this behaviour
.filter(contains("rc", "The 2 QUICK Brown-Foxes")).check(1)
.filter(contains("rc", "jump")).check(0)
.filter(contains("rc", "and/or")).check(1)
@@ -385,7 +402,7 @@ public void testReversePathHierarchyTokenizer() {
.withPartitionKey("pk")
.withColumn("pk", "int")
.withColumn("rc", "text", textMapper().analyzer("en"))
.withAnalyzer("en", customAnalyzer(new ReversePathHierarchyTokenizer()))
.withAnalyzer("en", customAnalyzer(new PathHierarchyTokenizer(true, '/', '/', 0)))
.build()
.createKeyspace()
.createTable()
@@ -551,7 +568,7 @@ public void testUnicodeWhiteSpaceTokenizerTokenizer() {
.withPartitionKey("pk")
.withColumn("pk", "int")
.withColumn("rc", "text", textMapper().analyzer("en"))
.withAnalyzer("en", customAnalyzer(new UnicodeWhitespaceTokenizer()))
.withAnalyzer("en", customAnalyzer(new WhitespaceTokenizer("unicode")))
.build()
.createKeyspace()
.createTable()
@@ -590,7 +607,6 @@ public void testUnicodeWhiteSpaceTokenizerTokenizer() {
.filter(fuzzy("rc", "gjumperd")).check(1)
.filter(fuzzy("rc", "dogjumperdog")).check(0)
.filter(contains("rc", "jumped")).check(1)
//TODO: check this behaviour
.filter(contains("rc", "jump")).check(0)
.filter(contains("rc", "jumper")).check(0)
.filter(contains("rc", "ajumped")).check(0)
@@ -653,7 +669,6 @@ public void testWhiteSpaceTokenizerTokenizer() {
.filter(fuzzy("rc", "gjumperd")).check(1)
.filter(fuzzy("rc", "dogjumperdog")).check(0)
.filter(contains("rc", "jumped")).check(1)
//TODO: check this behaviour
.filter(contains("rc", "jump")).check(0)
.filter(contains("rc", "jumper")).check(0)
.filter(contains("rc", "ajumped")).check(0)
@@ -707,4 +722,3 @@ public void testWikipediaTokenizerTokenizer() {
.filter(contains("rc", "sub head followed by some text")).check(1);
}
}