From 9c5a0bac1996e11115be64e8563144a8600d4fcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ko=C5=82aczkowski?= <pkolaczk@datastax.com>
Date: Thu, 7 Nov 2024 14:07:40 +0100
Subject: [PATCH] CNDB-11655: Limit the number of clauses before optimizing the
 Plan

Plan#optimize can take a very long time when given plans with thousands
of intersected clauses, which can result from using ngram analyzers.
Related issue: https://github.com/riptano/cndb/issues/10731.

Fixes https://github.com/riptano/cndb/issues/11655
---
 .../index/sai/plan/QueryController.java       | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java
index 7e216a3e657e..932bb8235862 100644
--- a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java
+++ b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java
@@ -105,8 +105,7 @@ public class QueryController implements Plan.Executor, Plan.CostEstimator
     /**
      * Controls whether we optimize query plans.
      * 0 disables the optimizer. As a side effect, hybrid ANN queries will default to FilterSortOrder.SCAN_THEN_FILTER.
-     * 1 enables the optimizer and tells the optimizer to respect the intersection clause limit.
-     * Higher values enable the optimizer and disable the hard intersection clause limit.
+     * 1 enables the optimizer.
      * Note: the config is not final to simplify testing.
      */
     @VisibleForTesting
@@ -353,34 +352,34 @@ Plan buildPlan()
         rowsIteration = planFactory.recheckFilter(command.rowFilter(), rowsIteration);
         rowsIteration = planFactory.limit(rowsIteration, command.limits().rows());
 
-        Plan optimizedPlan;
-        optimizedPlan = QUERY_OPT_LEVEL > 0
-                        ? rowsIteration.optimize()
-                        : rowsIteration;
-        optimizedPlan = RangeIntersectionIterator.INTERSECTION_CLAUSE_LIMIT > 0 && QUERY_OPT_LEVEL <= 1
-                        ? optimizedPlan.limitIntersectedClauses(RangeIntersectionIterator.INTERSECTION_CLAUSE_LIMIT)
-                        : optimizedPlan;
+        // We apply the intersection clause limit first before optimizing so we reduce the size of the
+        // plan given to the optimizer and hence we reduce the plan search space and speed up optimization.
+        // It is possible that some index operators like ':' expand to a huge number of MATCH predicates
+        // (see CNDB-10731) and could overload the optimizer.
+        Plan plan = rowsIteration.limitIntersectedClauses(RangeIntersectionIterator.INTERSECTION_CLAUSE_LIMIT);
+        if (QUERY_OPT_LEVEL > 0)
+            plan = plan.optimize();
 
-        if (optimizedPlan.contains(node -> node instanceof Plan.AnnIndexScan))
+        if (plan.contains(node -> node instanceof Plan.AnnIndexScan))
             queryContext.setFilterSortOrder(QueryContext.FilterSortOrder.SCAN_THEN_FILTER);
-        if (optimizedPlan.contains(node -> node instanceof Plan.KeysSort))
+        if (plan.contains(node -> node instanceof Plan.KeysSort))
             queryContext.setFilterSortOrder(QueryContext.FilterSortOrder.SEARCH_THEN_ORDER);
 
         if (logger.isTraceEnabled())
-            logger.trace("Query execution plan:\n" + optimizedPlan.toStringRecursive());
+            logger.trace("Query execution plan:\n" + plan.toStringRecursive());
 
         if (Tracing.isTracing())
         {
-            Tracing.trace("Query execution plan:\n" + optimizedPlan.toStringRecursive());
+            Tracing.trace("Query execution plan:\n" + plan.toStringRecursive());
             List<Plan.IndexScan> origIndexScans = keysIterationPlan.nodesOfType(Plan.IndexScan.class);
-            List<Plan.IndexScan> selectedIndexScans = optimizedPlan.nodesOfType(Plan.IndexScan.class);
+            List<Plan.IndexScan> selectedIndexScans = plan.nodesOfType(Plan.IndexScan.class);
             Tracing.trace("Selecting {} {} of {} out of {} indexes",
                           selectedIndexScans.size(),
                           selectedIndexScans.size() > 1 ? "indexes with cardinalities" : "index with cardinality",
                           selectedIndexScans.stream().map(s -> "" + ((long) s.expectedKeys())).collect(Collectors.joining(", ")),
                           origIndexScans.size());
         }
-        return optimizedPlan;
+        return plan;
     }
 
     private Plan.KeysIteration buildKeysIterationPlan()