Merge pull request #2982 from vespa-engine/kkraune/indexing-rewrite

Indexing-rewrite was deprecated and removed time ago
vespa-engine · Nov 13, 2023 · dab4d7a · dab4d7a
2 parents f92671c + d58527d
commit dab4d7a
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 39 deletions.
diff --git a/en/linguistics.html b/en/linguistics.html
@@ -323,16 +323,12 @@ <h3 id="multiple-languages">Multiple languages</h3>
 
 <h2 id="tokenization">Tokenization</h2>
 <p>
-Tokenization removes any non-word characters,
-and splits the string into <em>tokens</em> on each word boundary.
-In addition, CJK tokens are split using a <em>segmentation</em> algorithm.
-The resulting tokens are then searchable in the index.
-</p><p>
-To index strings as-is (that is, avoid tokenization),
-use <a href="reference/schema-reference.html#indexing-rewrite">indexing-rewrite</a>: none.
-</p><p>
-Also see <a href="reference/schema-reference.html#gram">N-gram matching</a>.
+  Tokenization removes any non-word characters,
+  and splits the string into <em>tokens</em> on each word boundary.
+  In addition, CJK tokens are split using a <em>segmentation</em> algorithm.
+  The resulting tokens are then searchable in the index.
 </p>
+<p>Also see <a href="reference/schema-reference.html#gram">N-gram matching</a>.</p>
 
 
 

diff --git a/en/reference/schema-reference.html b/en/reference/schema-reference.html
@@ -87,7 +87,6 @@ <h2 id="elements">Elements</h2>
             <a href="#index">index</a>
                 <a href="#index-hnsw">hnsw</a>
             <a href="#indexing">indexing</a>
-            <a href="#indexing-rewrite">indexing-rewrite</a>
             <a href="#match">match</a>
             <a href="#normalizing">normalizing</a>
             <a href="#query-command">query-command</a>
@@ -1155,12 +1154,6 @@ <h2 id="field">field</h2>
     from this field.</p></td>
 </tr>
 
-<tr><td style="white-space: nowrap"><a href="#indexing-rewrite">indexing-rewrite</a></td>
-  <td>Zero to one</td>
-  <td><p id="field-indexing-rewrite">Determines the rewriting Vespa is allowed to do on the indexing
-    statements of this field.</p></td>
-</tr>
-
 <tr><td><a href="#match">match</a></td>
   <td>Zero to one</td>
   <td><p id="field-match">Set the matching type to use for this field.</p></td>
@@ -3398,27 +3391,6 @@ <h2 id="indexing">indexing</h2>
 
 
 
-<h2 id="indexing-rewrite">indexing-rewrite</h2>
-<p>Contained in <a href="#field">field</a>.
-Vespa will normally rewrite indexing statements extensively to
-implement the technical tasks which are required to carry out the
-intentions of the indexing statement. The rewriting done can be
-controlled using this element.
-<pre>
-indexing-rewrite: none
-</pre>
-<p>
-Include this to let an indexing statement pass through
-unaltered. Note that such statements must begin with an
-<code>input &lt;fieldname&gt;</code>, <code>get_var</code> or
-constant expression. You should understand which rewrites Vespa
-does, and be certain that your indexing statement can do without them
-to use this. This statement must be placed somewhere below the
-<code>indexing</code> statement in the field.
-</p>
-
-
-
 <h2 id="match">match</h2>
 <p>Contained in <a href="#field">field</a>, <a href="#fieldset">fieldset</a> or
 <a href="#struct-field">struct-field</a>.

diff --git a/questions.jsonl b/questions.jsonl
@@ -753,7 +753,7 @@
 {"update": "id:open-p:paragraph::open/en/linguistics.html-querying-with-language", "fields": {"questions": {"assign": ["What is an indexed string field?", "What is symmetric tokenization?", "How does the query parser work?", "What controls the language state?", "Can multiple languages be in one index?", "How to store the language of a document?", "Why apply a filter at query-time?", "What does the language parameter affect?"]}}}
 {"update": "id:open-p:paragraph::open/en/linguistics.html-query-language-detection", "fields": {"questions": {"assign": ["What is language parameter in Vespa?", "How does language detector process queries?", "What is the confidence cutoff for language detection?"]}}}
 {"update": "id:open-p:paragraph::open/en/linguistics.html-multiple-languages", "fields": {"questions": {"assign": ["What are the approaches for retrieval?", "What is the benefit of equiv?", "What is Sentence-Bert?"]}}}
-{"update": "id:open-p:paragraph::open/en/linguistics.html-tokenization", "fields": {"questions": {"assign": ["What is tokenization?", "What is CJK segmentation?", "What is indexing-rewrite?", "What is N-gram matching?"]}}}
+{"update": "id:open-p:paragraph::open/en/linguistics.html-tokenization", "fields": {"questions": {"assign": ["What is tokenization?", "What is CJK segmentation?", "What is N-gram matching?"]}}}
 {"update": "id:open-p:paragraph::open/en/linguistics.html-normalization", "fields": {"questions": {"assign": ["What is text normalization?", "How does Vespa normalize text?", "What is the nfkc query term annotation?", "What is accentDrop annotation?", "What does normalization preserve?", "Which library does Vespa use for text normalization?", "What are the benefits of normalization?", "What kind of decorations are normalized by Vespa?"]}}}
 {"update": "id:open-p:paragraph::open/en/linguistics.html-stemming", "fields": {"questions": {"assign": ["What is stemming?", "What is a stemmer?", "How does stemming work?", "What is symmetric stemming?", "What is the purpose of stemming?", "What is bolding?", "How does bolding work?", "What is the stem query term?"]}}}
 {"update": "id:open-p:paragraph::open/en/linguistics.html-theory", "fields": {"questions": {"assign": ["What are equivalence classes in Vespa?", "What is the problem with low-level linguistics library in Vespa?", "How does multiple stemming help with recall?"]}}}
@@ -1342,7 +1342,6 @@
 {"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-index", "fields": {"questions": {"assign": ["What are index parameters?", "What is an alias in index?", "What is stemming setting?", "What is dense posting list threshold value?"]}}}
 {"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-hnsw", "fields": {"questions": {"assign": ["What is an HNSW index?", "What query operator uses HNSW?", "What are the parameters of HNSW?", "What is the default value of max-links-per-node parameter?", "What is the default value of neighbors-to-explore-at-insert parameter?", "Where can I find examples of using HNSW in Vespa?", "Why did Vespa team select HNSW as the baseline algorithm?"]}}}
 {"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-indexing", "fields": {"questions": {"assign": ["What is the indexing language?", "What does index statement do?"]}}}
-{"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-indexing-rewrite", "fields": {"questions": {"assign": ["What is indexing-rewrite?", "What does the 'none' option do?"]}}}
 {"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-match", "fields": {"questions": {"assign": ["What is match type \"text\"?", "What is match type \"word\"?", "What is match type \"exact\"?", "What is match type \"cased\"?", "What is match type \"uncased\"?", "What is match type \"max-length\"?", "What is match type \"gram\"?"]}}}
 {"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-rank", "fields": {"questions": {"assign": ["What is 'filter' ranking setting?"]}}}
 {"update": "id:open-p:paragraph::open/en/reference/schema-reference.html-query-command", "fields": {"questions": {"assign": ["What is query-command?", "What is the built-in query-command available?"]}}}