From 9c5a0bac1996e11115be64e8563144a8600d4fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ko=C5=82aczkowski?= Date: Thu, 7 Nov 2024 14:07:40 +0100 Subject: [PATCH] CNDB-11655: Limit the number of clauses before optimizing the Plan Plan#optimize can take a very long time when given plans with thousands of intersected clauses, which can result from using ngram analyzers. Related issue: https://github.com/riptano/cndb/issues/10731. Fixes https://github.com/riptano/cndb/issues/11655 --- .../index/sai/plan/QueryController.java | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java index 7e216a3e657e..932bb8235862 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java +++ b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java @@ -105,8 +105,7 @@ public class QueryController implements Plan.Executor, Plan.CostEstimator /** * Controls whether we optimize query plans. * 0 disables the optimizer. As a side effect, hybrid ANN queries will default to FilterSortOrder.SCAN_THEN_FILTER. - * 1 enables the optimizer and tells the optimizer to respect the intersection clause limit. - * Higher values enable the optimizer and disable the hard intersection clause limit. + * 1 enables the optimizer. * Note: the config is not final to simplify testing. */ @VisibleForTesting @@ -353,34 +352,34 @@ Plan buildPlan() rowsIteration = planFactory.recheckFilter(command.rowFilter(), rowsIteration); rowsIteration = planFactory.limit(rowsIteration, command.limits().rows()); - Plan optimizedPlan; - optimizedPlan = QUERY_OPT_LEVEL > 0 - ? rowsIteration.optimize() - : rowsIteration; - optimizedPlan = RangeIntersectionIterator.INTERSECTION_CLAUSE_LIMIT > 0 && QUERY_OPT_LEVEL <= 1 - ? optimizedPlan.limitIntersectedClauses(RangeIntersectionIterator.INTERSECTION_CLAUSE_LIMIT) - : optimizedPlan; + // We apply the intersection clause limit first before optimizing so we reduce the size of the + // plan given to the optimizer and hence we reduce the plan search space and speed up optimization. + // It is possible that some index operators like ':' expand to a huge number of MATCH predicates + // (see CNDB-10731) and could overload the optimizer. + Plan plan = rowsIteration.limitIntersectedClauses(RangeIntersectionIterator.INTERSECTION_CLAUSE_LIMIT); + if (QUERY_OPT_LEVEL > 0) + plan = plan.optimize(); - if (optimizedPlan.contains(node -> node instanceof Plan.AnnIndexScan)) + if (plan.contains(node -> node instanceof Plan.AnnIndexScan)) queryContext.setFilterSortOrder(QueryContext.FilterSortOrder.SCAN_THEN_FILTER); - if (optimizedPlan.contains(node -> node instanceof Plan.KeysSort)) + if (plan.contains(node -> node instanceof Plan.KeysSort)) queryContext.setFilterSortOrder(QueryContext.FilterSortOrder.SEARCH_THEN_ORDER); if (logger.isTraceEnabled()) - logger.trace("Query execution plan:\n" + optimizedPlan.toStringRecursive()); + logger.trace("Query execution plan:\n" + plan.toStringRecursive()); if (Tracing.isTracing()) { - Tracing.trace("Query execution plan:\n" + optimizedPlan.toStringRecursive()); + Tracing.trace("Query execution plan:\n" + plan.toStringRecursive()); List origIndexScans = keysIterationPlan.nodesOfType(Plan.IndexScan.class); - List selectedIndexScans = optimizedPlan.nodesOfType(Plan.IndexScan.class); + List selectedIndexScans = plan.nodesOfType(Plan.IndexScan.class); Tracing.trace("Selecting {} {} of {} out of {} indexes", selectedIndexScans.size(), selectedIndexScans.size() > 1 ? "indexes with cardinalities" : "index with cardinality", selectedIndexScans.stream().map(s -> "" + ((long) s.expectedKeys())).collect(Collectors.joining(", ")), origIndexScans.size()); } - return optimizedPlan; + return plan; } private Plan.KeysIteration buildKeysIterationPlan()