Add testAt CustomAnalizer

Stratio · jpgilaberte · Apr 24, 2017 · Apr 26, 2017 · Apr 26, 2017 · Apr 28, 2017
commit c98e3fc5e83fc0d21029da4bc28fd9ce959cfdc8
diff --git a/...java/com/stratio/cassandra/lucene/testsAT/schema/analysis/tokenizer/CustomAnalyzerIT.java b/...java/com/stratio/cassandra/lucene/testsAT/schema/analysis/tokenizer/CustomAnalyzerIT.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 Stratio (http://stratio.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.stratio.cassandra.lucene.testsAT.schema.analysis.tokenizer;
+
+import com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenizer.NGramTokenizer;
+import com.stratio.cassandra.lucene.testsAT.BaseIT;
+import com.stratio.cassandra.lucene.testsAT.util.CassandraUtils;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import static com.stratio.cassandra.lucene.builder.Builder.*;
+import static com.stratio.cassandra.lucene.builder.Builder.match;
+
+/**
+ * Created by jpgilaberte on 2/06/17.
+ */
+@RunWith(JUnit4.class)
+public class CustomAnalyzerIT extends BaseIT {
+    private static CassandraUtils utils;
+
+    @BeforeClass
+    public static void before() {}
+
+    @AfterClass
+    public static void after() {
+        CassandraUtils.dropKeyspaceIfNotNull(utils);
+    }
+
+    @Test
+    public void testCustomAnalyzer() {
+        utils = CassandraUtils.builder("tokenizer")
+                .withPartitionKey("pk")
+                .withColumn("pk", "int")
+                .withColumn("rc", "text", textMapper().analyzer("en"))
+                .withAnalyzer("en", customAnalyzer(new NGramTokenizer(2,2)))
+                .build()
+                .createKeyspace()
+                .createTable()
+                .insert("pk,rc", 1, "aabb")
+                .createIndex().refresh()
+                .filter(all()).check(1)
+                .filter(none()).check(0)
+                .filter(match("rc", "aa")).check(1)
+                .filter(match("rc", "ab")).check(1)
+                .filter(match("rc", "bb")).check(1);
+    }
+}
diff --git a/...va/com/stratio/cassandra/lucene/testsAT/schema/analysis/tokenizer/TokenizerBuilderIT.java b/...va/com/stratio/cassandra/lucene/testsAT/schema/analysis/tokenizer/TokenizerBuilderIT.java
@@ -44,6 +44,25 @@ public static void after() {
         CassandraUtils.dropKeyspaceIfNotNull(utils);
     }
 
+//    @Test
+//    public void testClassicTokenizer() {
+//        utils = CassandraUtils.builder("tokenizer")
+//                .withPartitionKey("pk")
+//                .withColumn("pk", "int")
+//                .withColumn("rc", "text", textMapper().analyzer("en"))
+//                .withAnalyzer("en", customAnalyzer(new NGramTokenizer(2,1),
+//                        null,
+//                        null))
+//                .build()
+//                .createKeyspace()
+//                .createTable()
+//                .insert("pk,rc", 1, "aabb")
+//                .createIndex().refresh()
+//                .filter(all()).check(1)
+//                .filter(none()).check(0)
+//                .filter(match("rc", "aa")).check(1)
+//                .filter(match("rc", "ab")).check(1);
+//    }
     @Test
     public void testClassicTokenizer() {
         utils = CassandraUtils.builder("tokenizer")
@@ -218,7 +237,6 @@ public void testLetterTokenizer() {
                 .filter(phrase("rc", "The 2 QUICK Brown-Foxes jumped the")).check(1)
                 .filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone. and/o")).check(0)
                 .filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone.")).check(0)
-                //TODO: check this behaviour
                 .filter(contains("rc", "The 2 QUICK Brown-Foxes")).check(1)
                 .filter(contains("rc", "jump")).check(0)
                 .filter(contains("rc", "and/or")).check(1)
@@ -249,7 +267,6 @@ public void testLowerCaseTokenizer() {
                 .filter(phrase("rc", "The 2 QUICK Brown-Foxes jumped the")).check(1)
                 .filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone. and/o")).check(0)
                 .filter(fuzzy("rc", "The 2 QUICK Brown-Foxes jumped the lazy dog bone.")).check(0)
-                //TODO: check this behaviour
                 .filter(contains("rc", "The 2 QUICK Brown-Foxes")).check(1)
                 .filter(contains("rc", "jump")).check(0)
                 .filter(contains("rc", "and/or")).check(1)
@@ -385,7 +402,7 @@ public void testReversePathHierarchyTokenizer() {
                 .withPartitionKey("pk")
                 .withColumn("pk", "int")
                 .withColumn("rc", "text", textMapper().analyzer("en"))
-                .withAnalyzer("en", customAnalyzer(new ReversePathHierarchyTokenizer()))
+                .withAnalyzer("en", customAnalyzer(new PathHierarchyTokenizer(true, '/', '/', 0)))
                 .build()
                 .createKeyspace()
                 .createTable()
@@ -551,7 +568,7 @@ public void testUnicodeWhiteSpaceTokenizerTokenizer() {
                 .withPartitionKey("pk")
                 .withColumn("pk", "int")
                 .withColumn("rc", "text", textMapper().analyzer("en"))
-                .withAnalyzer("en", customAnalyzer(new UnicodeWhitespaceTokenizer()))
+                .withAnalyzer("en", customAnalyzer(new WhitespaceTokenizer("unicode")))
                 .build()
                 .createKeyspace()
                 .createTable()
@@ -590,7 +607,6 @@ public void testUnicodeWhiteSpaceTokenizerTokenizer() {
                 .filter(fuzzy("rc", "gjumperd")).check(1)
                 .filter(fuzzy("rc", "dogjumperdog")).check(0)
                 .filter(contains("rc", "jumped")).check(1)
-                //TODO: check this behaviour
                 .filter(contains("rc", "jump")).check(0)
                 .filter(contains("rc", "jumper")).check(0)
                 .filter(contains("rc", "ajumped")).check(0)
@@ -653,7 +669,6 @@ public void testWhiteSpaceTokenizerTokenizer() {
                 .filter(fuzzy("rc", "gjumperd")).check(1)
                 .filter(fuzzy("rc", "dogjumperdog")).check(0)
                 .filter(contains("rc", "jumped")).check(1)
-                //TODO: check this behaviour
                 .filter(contains("rc", "jump")).check(0)
                 .filter(contains("rc", "jumper")).check(0)
                 .filter(contains("rc", "ajumped")).check(0)
@@ -707,4 +722,3 @@ public void testWikipediaTokenizerTokenizer() {
                 .filter(contains("rc", "sub head followed by some text")).check(1);
     }
 }
-