diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 79a3783f86..c02e769bdd 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -164,6 +164,14 @@ std::shared_ptr QueryExecutionTree::createSortedTree( return qet; } + // Unwrap sort to avoid stacking sorts on top of each other. + if (auto sort = std::dynamic_pointer_cast(qet->getRootOperation())) { + AD_LOG_WARN << "Tried to re-sort a subtree that will already be sorted " + "with `Sort` with a different sort order. This is a bug." + << std::endl; + qet = sort->getSubtree(); + } + QueryExecutionContext* qec = qet->getRootOperation()->getExecutionContext(); auto sort = std::make_shared(qec, std::move(qet), sortColumns); return std::make_shared(qec, std::move(sort)); diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 9785710f91..20ac1ec47b 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -56,17 +56,17 @@ namespace p = parsedQuery; namespace { using ad_utility::makeExecutionTree; +using SubtreePlan = QueryPlanner::SubtreePlan; template -QueryPlanner::SubtreePlan makeSubtreePlan(QueryExecutionContext* qec, - auto&&... args) { +SubtreePlan makeSubtreePlan(QueryExecutionContext* qec, auto&&... args) { return {qec, std::make_shared(qec, AD_FWD(args)...)}; } // Create a `SubtreePlan` that holds the given `operation`. `Op` must be a class // inheriting from `Operation`. template -QueryPlanner::SubtreePlan makeSubtreePlan(std::shared_ptr operation) { +SubtreePlan makeSubtreePlan(std::shared_ptr operation) { auto* qec = operation->getExecutionContext(); return {qec, std::move(operation)}; } @@ -74,9 +74,8 @@ QueryPlanner::SubtreePlan makeSubtreePlan(std::shared_ptr operation) { // Update the `target` query plan such that it knows that it includes all the // nodes and filters from `a` and `b`. NOTE: This does not actually merge // the plans from `a` and `b`. -void mergeSubtreePlanIds(QueryPlanner::SubtreePlan& target, - const QueryPlanner::SubtreePlan& a, - const QueryPlanner::SubtreePlan& b) { +void mergeSubtreePlanIds(SubtreePlan& target, const SubtreePlan& a, + const SubtreePlan& b) { target._idsOfIncludedNodes = a._idsOfIncludedNodes | b._idsOfIncludedNodes; target._idsOfIncludedFilters = a._idsOfIncludedFilters | b._idsOfIncludedFilters; @@ -93,8 +92,8 @@ QueryPlanner::QueryPlanner(QueryExecutionContext* qec, } // _____________________________________________________________________________ -std::vector QueryPlanner::createExecutionTrees( - ParsedQuery& pq, bool isSubquery) { +std::vector QueryPlanner::createExecutionTrees(ParsedQuery& pq, + bool isSubquery) { // Store the dataset clause (FROM and FROM NAMED clauses), s.t. we have access // to them down the callstack. Subqueries can't have their own dataset clause, // but inherit it from the parent query. @@ -227,7 +226,7 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq, } // _____________________________________________________________________________ -std::vector QueryPlanner::optimize( +std::vector QueryPlanner::optimize( ParsedQuery::GraphPattern* rootPattern) { QueryPlanner::GraphPatternPlanner optimizer{*this, rootPattern}; for (auto& child : rootPattern->_graphPatterns) { @@ -272,7 +271,7 @@ std::vector QueryPlanner::optimize( } // _____________________________________________________________________________ -vector QueryPlanner::getDistinctRow( +vector QueryPlanner::getDistinctRow( const p::SelectClause& selectClause, const vector>& dpTab) const { const vector& previous = dpTab[dpTab.size() - 1]; @@ -315,7 +314,7 @@ vector QueryPlanner::getDistinctRow( } // _____________________________________________________________________________ -vector QueryPlanner::getPatternTrickRow( +vector QueryPlanner::getPatternTrickRow( const p::SelectClause& selectClause, const vector>& dpTab, const checkUsePatternTrick::PatternTrickTuple& patternTrickTuple) { @@ -346,7 +345,7 @@ vector QueryPlanner::getPatternTrickRow( } // _____________________________________________________________________________ -vector QueryPlanner::getHavingRow( +vector QueryPlanner::getHavingRow( const ParsedQuery& pq, const vector>& dpTab) const { const vector& previous = dpTab[dpTab.size() - 1]; vector added; @@ -363,7 +362,7 @@ vector QueryPlanner::getHavingRow( } // _____________________________________________________________________________ -vector QueryPlanner::getGroupByRow( +vector QueryPlanner::getGroupByRow( const ParsedQuery& pq, const vector>& dpTab) const { const vector& previous = dpTab[dpTab.size() - 1]; vector added; @@ -398,7 +397,7 @@ vector QueryPlanner::getGroupByRow( } // _____________________________________________________________________________ -vector QueryPlanner::getOrderByRow( +vector QueryPlanner::getOrderByRow( const ParsedQuery& pq, const vector>& dpTab) const { const vector& previous = dpTab[dpTab.size() - 1]; vector added; @@ -725,8 +724,8 @@ void QueryPlanner::seedFromOrdinaryTriple( // _____________________________________________________________________________ auto QueryPlanner::seedWithScansAndText( const QueryPlanner::TripleGraph& tg, - const vector>& children, - TextLimitMap& textLimits) -> PlansAndFilters { + const vector>& children, TextLimitMap& textLimits) + -> PlansAndFilters { PlansAndFilters result; vector& seeds = result.plans_; // add all child plans as seeds @@ -989,7 +988,7 @@ Variable QueryPlanner::generateUniqueVarName() { } // _____________________________________________________________________________ -QueryPlanner::SubtreePlan QueryPlanner::getTextLeafPlan( +SubtreePlan QueryPlanner::getTextLeafPlan( const QueryPlanner::TripleGraph::Node& node, TextLimitMap& textLimits) const { AD_CONTRACT_CHECK(node.wordPart_.has_value()); @@ -1026,9 +1025,8 @@ QueryPlanner::SubtreePlan QueryPlanner::getTextLeafPlan( } // _____________________________________________________________________________ -vector QueryPlanner::merge( - const vector& a, - const vector& b, +vector QueryPlanner::merge( + const vector& a, const vector& b, const QueryPlanner::TripleGraph& tg) const { // TODO: Add the following features: // If a join is supposed to happen, always check if it happens between @@ -1109,18 +1107,13 @@ string QueryPlanner::TripleGraph::asString() const { } // _____________________________________________________________________________ -size_t QueryPlanner::SubtreePlan::getCostEstimate() const { - return _qet->getCostEstimate(); -} +size_t SubtreePlan::getCostEstimate() const { return _qet->getCostEstimate(); } // _____________________________________________________________________________ -size_t QueryPlanner::SubtreePlan::getSizeEstimate() const { - return _qet->getSizeEstimate(); -} +size_t SubtreePlan::getSizeEstimate() const { return _qet->getSizeEstimate(); } // _____________________________________________________________________________ -bool QueryPlanner::connected(const QueryPlanner::SubtreePlan& a, - const QueryPlanner::SubtreePlan& b, +bool QueryPlanner::connected(const SubtreePlan& a, const SubtreePlan& b, const QueryPlanner::TripleGraph& tg) const { // Check if there is overlap. // If so, don't consider them as properly connected. @@ -1149,15 +1142,15 @@ bool QueryPlanner::connected(const QueryPlanner::SubtreePlan& a, } // _____________________________________________________________________________ -std::vector> QueryPlanner::getJoinColumns( - const SubtreePlan& a, const SubtreePlan& b) { +QueryPlanner::JoinColumns QueryPlanner::getJoinColumns(const SubtreePlan& a, + const SubtreePlan& b) { AD_CORRECTNESS_CHECK(a._qet && b._qet); return QueryExecutionTree::getJoinColumns(*a._qet, *b._qet); } // _____________________________________________________________________________ string QueryPlanner::getPruningKey( - const QueryPlanner::SubtreePlan& plan, + const SubtreePlan& plan, const vector& orderedOnColumns) const { // Get the ordered var std::ostringstream os; @@ -1183,8 +1176,7 @@ string QueryPlanner::getPruningKey( // _____________________________________________________________________________ template void QueryPlanner::applyFiltersIfPossible( - vector& row, - const vector& filters) const { + vector& row, const vector& filters) const { // Apply every filter possible. // It is possible when, // 1) the filter has not already been applied @@ -1237,9 +1229,9 @@ void QueryPlanner::applyFiltersIfPossible( } // _____________________________________________________________________________ -void QueryPlanner::applyTextLimitsIfPossible( - vector& row, const TextLimitVec& textLimits, - bool replace) const { +void QueryPlanner::applyTextLimitsIfPossible(vector& row, + const TextLimitVec& textLimits, + bool replace) const { // Apply text limits if possible. // A text limit can be applied to a plan if: // 1) There is no text operation for the text record column left. @@ -1310,12 +1302,12 @@ size_t QueryPlanner::findUniqueNodeIds( } // _____________________________________________________________________________ -std::vector +std::vector QueryPlanner::runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const { - vector> dpTab; + vector> dpTab; // find the unique number of nodes in the current connected component // (there might be duplicates because we already have multiple candidates // for each index scan with different permutations. @@ -1346,9 +1338,9 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( } // _____________________________________________________________________________ -size_t QueryPlanner::countSubgraphs( - std::vector graph, - const std::vector& filters, size_t budget) { +size_t QueryPlanner::countSubgraphs(std::vector graph, + const std::vector& filters, + size_t budget) { // Remove duplicate plans from `graph`. auto getId = [](const SubtreePlan* v) { return v->_idsOfIncludedNodes; }; ql::ranges::sort(graph, ql::ranges::less{}, getId); @@ -1365,7 +1357,7 @@ size_t QueryPlanner::countSubgraphs( // is contained in the `filters`, because this will bring the estimate of this // function closer to the actual behavior of the DP query planner (it always // applies either all possible filters at once, or none of them). - std::vector dummyPlansForFilter; + std::vector dummyPlansForFilter; ad_utility::HashSet> deduplicatedFilterVariables; for (const auto& filter : filters) { @@ -1418,8 +1410,7 @@ size_t QueryPlanner::countSubgraphs( } // _____________________________________________________________________________ -std::vector -QueryPlanner::runGreedyPlanningOnConnectedComponent( +std::vector QueryPlanner::runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const { @@ -1493,10 +1484,9 @@ QueryPlanner::runGreedyPlanningOnConnectedComponent( } // _____________________________________________________________________________ -vector> QueryPlanner::fillDpTab( +vector> QueryPlanner::fillDpTab( const QueryPlanner::TripleGraph& tg, vector filters, - TextLimitMap& textLimits, - const vector>& children) { + TextLimitMap& textLimits, const vector>& children) { auto [initialPlans, additionalFilters] = seedWithScansAndText(tg, children, textLimits); ql::ranges::move(additionalFilters, std::back_inserter(filters)); @@ -1884,7 +1874,7 @@ size_t QueryPlanner::findSmallestExecutionTree( }; // _____________________________________________________________________________ -std::vector QueryPlanner::createJoinCandidates( +std::vector QueryPlanner::createJoinCandidates( const SubtreePlan& ain, const SubtreePlan& bin, boost::optional tg) const { bool swapForTesting = isInTestMode() && bin.type != SubtreePlan::OPTIONAL && @@ -1896,7 +1886,7 @@ std::vector QueryPlanner::createJoinCandidates( // TODO find out, what is ACTUALLY the use case for the triple // graph. Is it only meant for (questionable) performance reasons // or does it change the meaning. - std::vector> jcs; + JoinColumns jcs; if (tg) { if (connected(a, b, *tg)) { jcs = getJoinColumns(a, b); @@ -1905,6 +1895,19 @@ std::vector QueryPlanner::createJoinCandidates( jcs = getJoinColumns(a, b); } + return createJoinCandidates(ain, bin, jcs); +} + +// _____________________________________________________________________________ +std::vector QueryPlanner::createJoinCandidates( + const SubtreePlan& ain, const SubtreePlan& bin, + const JoinColumns& jcs) const { + bool swapForTesting = isInTestMode() && bin.type != SubtreePlan::OPTIONAL && + ain._qet->getCacheKey() < bin._qet->getCacheKey(); + const auto& a = !swapForTesting ? ain : bin; + const auto& b = !swapForTesting ? bin : ain; + std::vector candidates; + if (jcs.empty()) { // The candidates are not connected return candidates; @@ -1975,6 +1978,12 @@ std::vector QueryPlanner::createJoinCandidates( candidates.push_back(std::move(opt.value())); } + // Test if one of `a` or `b` is a union whose children can each have the joins + // applied individually. + for (SubtreePlan& plan : applyJoinDistributivelyToUnion(a, b, jcs)) { + candidates.push_back(std::move(plan)); + } + // Test if one of `a` or `b` is a transitive path to which we can bind the // other one. if (auto opt = createJoinWithTransitivePath(a, b, jcs)) { @@ -2003,9 +2012,8 @@ std::pair QueryPlanner::checkSpatialJoin(const SubtreePlan& a, } // _____________________________________________________________________________ -auto QueryPlanner::createSpatialJoin( - const SubtreePlan& a, const SubtreePlan& b, - const std::vector>& jcs) +auto QueryPlanner::createSpatialJoin(const SubtreePlan& a, const SubtreePlan& b, + const JoinColumns& jcs) -> std::optional { auto [aIs, bIs] = checkSpatialJoin(a, b); @@ -2040,10 +2048,124 @@ auto QueryPlanner::createSpatialJoin( return plan; } +// _____________________________________________________________________________________________________________________ + +namespace { +// Helper function that maps the indices from the unions' columns to the +// children's columns if possible. Otherwise the entry in `jcs` is dropped. +std::pair +mapColumnsInUnion(size_t columnIndex, const Union& unionOperation, + const QueryPlanner::JoinColumns& jcs) { + QueryPlanner::JoinColumns leftMapping; + leftMapping.reserve(jcs.size()); + QueryPlanner::JoinColumns rightMapping; + rightMapping.reserve(jcs.size()); + auto mapColumns = [columnIndex, &unionOperation]( + bool isLeft, std::array columns) + -> std::optional> { + ColumnIndex& column = columns.at(columnIndex); + auto tmp = unionOperation.getOriginalColumn(isLeft, column); + if (tmp.has_value()) { + column = tmp.value(); + return columns; + } + return std::nullopt; + }; + for (const auto& joinColumns : jcs) { + if (auto mappedColumn = mapColumns(true, joinColumns)) { + leftMapping.push_back(mappedColumn.value()); + } + if (auto mappedColumn = mapColumns(false, joinColumns)) { + rightMapping.push_back(mappedColumn.value()); + } + } + return {std::move(leftMapping), std::move(rightMapping)}; +} + +// Helper function that clones a SubtreePlan with a new QueryExecutionTree. +SubtreePlan cloneWithNewTree(const SubtreePlan& plan, + std::shared_ptr newTree) { + SubtreePlan newPlan = plan; + newPlan._qet = std::move(newTree); + return newPlan; +} + +// Check if an unbound transitive path is somewhere in the tree. This is because +// the optimization with `Union` currently only makes sense if there is a +// transitive path in the tree that benefits from directly applying the join. +bool hasUnboundTransitivePathInTree(const Operation& operation) { + if (auto* transitivePath = + dynamic_cast(&operation)) { + return !transitivePath->isBoundOrId(); + } + // Only check `UNION`s for children. + if (!dynamic_cast(&operation)) { + return false; + } + return ql::ranges::any_of( + operation.getChildren(), [](const QueryExecutionTree* child) { + return hasUnboundTransitivePathInTree(*child->getRootOperation()); + }); +} +} // namespace + +// _____________________________________________________________________________________________________________________ +auto QueryPlanner::applyJoinDistributivelyToUnion(const SubtreePlan& a, + const SubtreePlan& b, + const JoinColumns& jcs) const + -> std::vector { + AD_CORRECTNESS_CHECK(jcs.size() == 1); + AD_CORRECTNESS_CHECK(a.type == SubtreePlan::BASIC && + b.type == SubtreePlan::BASIC); + std::vector candidates{}; + auto findCandidates = [this, &candidates, &jcs](const SubtreePlan& thisPlan, + const SubtreePlan& other, + bool flipped) { + auto unionOperation = + std::dynamic_pointer_cast(thisPlan._qet->getRootOperation()); + if (!unionOperation || !hasUnboundTransitivePathInTree(*unionOperation)) { + return; + } + + auto findJoinCandidates = [this, flipped](const SubtreePlan& plan1, + const SubtreePlan& plan2, + const JoinColumns& jcs) { + if (jcs.empty()) { + return std::vector{makeSubtreePlan( + _qec, std::vector{plan1._qet, plan2._qet})}; + } + return createJoinCandidates(flipped ? plan2 : plan1, + flipped ? plan1 : plan2, jcs); + }; + + auto [leftMapping, rightMapping] = + mapColumnsInUnion(flipped, *unionOperation, jcs); + + auto joinedLeft = findJoinCandidates( + cloneWithNewTree(thisPlan, unionOperation->leftChild()), other, + leftMapping); + auto joinedRight = findJoinCandidates( + cloneWithNewTree(thisPlan, unionOperation->rightChild()), + cloneWithNewTree(other, other._qet->clone()), rightMapping); + + for (const auto& leftPlan : joinedLeft) { + for (const auto& rightPlan : joinedRight) { + SubtreePlan candidate = + makeSubtreePlan(_qec, leftPlan._qet, rightPlan._qet); + mergeSubtreePlanIds(candidate, thisPlan, other); + candidates.push_back(std::move(candidate)); + } + } + }; + findCandidates(a, b, false); + findCandidates(b, a, true); + return candidates; +} + // __________________________________________________________________________________________________________________ -auto QueryPlanner::createJoinWithTransitivePath( - SubtreePlan a, SubtreePlan b, - const std::vector>& jcs) +auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, + const SubtreePlan& b, + const JoinColumns& jcs) -> std::optional { auto aTransPath = std::dynamic_pointer_cast( a._qet->getRootOperation()); @@ -2086,9 +2208,9 @@ auto QueryPlanner::createJoinWithTransitivePath( } // ______________________________________________________________________________________ -auto QueryPlanner::createJoinWithHasPredicateScan( - SubtreePlan a, SubtreePlan b, - const std::vector>& jcs) +auto QueryPlanner::createJoinWithHasPredicateScan(const SubtreePlan& a, + const SubtreePlan& b, + const JoinColumns& jcs) -> std::optional { // Check if one of the two operations is a HAS_PREDICATE_SCAN. // If the join column corresponds to the has-predicate scan's @@ -2125,9 +2247,9 @@ auto QueryPlanner::createJoinWithHasPredicateScan( } // _____________________________________________________________________ -auto QueryPlanner::createJoinWithPathSearch( - const SubtreePlan& a, const SubtreePlan& b, - const std::vector>& jcs) +auto QueryPlanner::createJoinWithPathSearch(const SubtreePlan& a, + const SubtreePlan& b, + const JoinColumns& jcs) -> std::optional { auto aRootOp = std::dynamic_pointer_cast(a._qet->getRootOperation()); @@ -2378,7 +2500,6 @@ void QueryPlanner::GraphPatternPlanner::visitGroupOptionalOrMinus( template void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) { using T = std::decay_t; - using SubtreePlan = QueryPlanner::SubtreePlan; if constexpr (std::is_same_v || std::is_same_v) { // If this is a `GRAPH {...}` clause, then we have to overwrite the diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index a34b6b6845..9875d21047 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -32,6 +32,8 @@ class QueryPlanner { std::optional activeGraphVariable_; public: + using JoinColumns = std::vector>; + explicit QueryPlanner(QueryExecutionContext* qec, CancellationHandle cancellationHandle); @@ -330,17 +332,31 @@ class QueryPlanner { const vector& b, const TripleGraph& tg) const; - std::vector createJoinCandidates( + // Create `SubtreePlan`s that join `a` and `b` together. The columns are + // computed automatically. + std::vector createJoinCandidates( const SubtreePlan& a, const SubtreePlan& b, boost::optional tg) const; + // Create `SubtreePlan`s that join `a` and `b` together. The columns are + // configured by `jcs`. + std::vector createJoinCandidates(const SubtreePlan& a, + const SubtreePlan& b, + const JoinColumns& jcs) const; + + // Whenever a join is applied to a `Union`, add candidates that try applying + // join to the children of the union directly, which can be more efficient if + // one of the children has an optimized join, which can happen for + // `TransitivePath` for example. + std::vector applyJoinDistributivelyToUnion( + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs) const; + // Used internally by `createJoinCandidates`. If `a` or `b` is a transitive // path operation and the other input can be bound to this transitive path // (see `TransitivePath.cpp` for details), then returns that bound transitive - // path. Else returns `std::nullopt` + // path. Else returns `std::nullopt`. static std::optional createJoinWithTransitivePath( - SubtreePlan a, SubtreePlan b, - const std::vector>& jcs); + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs); // Used internally by `createJoinCandidates`. If `a` or `b` is a // `HasPredicateScan` with a variable as a subject (`?x ql:has-predicate @@ -348,12 +364,10 @@ class QueryPlanner { // then returns a `HasPredicateScan` that takes the other input as a subtree. // Else returns `std::nullopt`. static std::optional createJoinWithHasPredicateScan( - SubtreePlan a, SubtreePlan b, - const std::vector>& jcs); + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs); static std::optional createJoinWithPathSearch( - const SubtreePlan& a, const SubtreePlan& b, - const std::vector>& jcs); + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs); // Helper that returns `true` for each of the subtree plans `a` and `b` iff // the subtree plan is a spatial join and it is not yet fully constructed @@ -364,9 +378,9 @@ class QueryPlanner { // if one of the inputs is a spatial join which is compatible with the other // input, then add that other input to the spatial join as a child instead of // creating a normal join. - static std::optional createSpatialJoin( - const SubtreePlan& a, const SubtreePlan& b, - const std::vector>& jcs); + static std::optional createSpatialJoin(const SubtreePlan& a, + const SubtreePlan& b, + const JoinColumns& jcs); vector getOrderByRow( const ParsedQuery& pq, @@ -391,8 +405,7 @@ class QueryPlanner { bool connected(const SubtreePlan& a, const SubtreePlan& b, const TripleGraph& graph) const; - static std::vector> getJoinColumns( - const SubtreePlan& a, const SubtreePlan& b); + static JoinColumns getJoinColumns(const SubtreePlan& a, const SubtreePlan& b); string getPruningKey(const SubtreePlan& plan, const vector& orderedOnColumns) const; diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index ed907c30e1..380cb644a9 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -26,7 +26,7 @@ struct TableColumnWithVocab { // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103909 for more info. TableColumnWithVocab(const IdTable* table, ColumnType column, LocalVocab vocab) - : table_{table}, column_{std::move(column)}, vocab_{std::move(vocab)} {}; + : table_{table}, column_{std::move(column)}, vocab_{std::move(vocab)} {} }; }; // namespace detail @@ -50,7 +50,7 @@ class TransitivePathImpl : public TransitivePathBase { TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, size_t maxDist) : TransitivePathBase(qec, std::move(child), std::move(leftSide), - std::move(rightSide), minDist, maxDist){}; + std::move(rightSide), minDist, maxDist) {} /** * @brief Compute the transitive hull with a bound side. diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index fa5a07427b..8bd5b1ff20 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -239,6 +239,13 @@ std::vector Union::computePermutation() const { return permutation; } +// _____________________________________________________________________________ +std::optional Union::getOriginalColumn( + bool leftChild, ColumnIndex unionColumn) const { + ColumnIndex column = _columnOrigins.at(unionColumn).at(!leftChild); + return column == NO_COLUMN ? std::nullopt : std::optional{column}; +} + // _____________________________________________________________________________ IdTable Union::transformToCorrectColumnFormat( IdTable idTable, const std::vector& permutation) const { diff --git a/src/engine/Union.h b/src/engine/Union.h index 3d87664380..74e34b6e39 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,6 +61,22 @@ class Union : public Operation { return {_subtrees[0].get(), _subtrees[1].get()}; } + // Provide access the the left child of this union. + const std::shared_ptr& leftChild() const { + return _subtrees[0]; + } + + // Provide access the the right child of this union. + const std::shared_ptr& rightChild() const { + return _subtrees[1]; + } + + // Return the original index of the column in the left or right child that the + // respective column of this union maps to. If the index does not map to the + // respective child, std::nullopt is returned. + std::optional getOriginalColumn(bool leftChild, + ColumnIndex unionColumn) const; + private: std::unique_ptr cloneImpl() const override; diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index a5e9650d0c..c6af638180 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -3050,3 +3050,190 @@ TEST(QueryPlanner, UnconnectedComponentsInGraphClause) { h::CartesianProductJoin(h::IndexScanFromStrings("?s1", "?p1", "?o1"), h::IndexScanFromStrings("?s2", "?p2", "?o2"))); } + +// _____________________________________________________________________________ +TEST(QueryPlanner, testDistributiveJoinInUnion) { + auto* qec = ad_utility::testing::getQec(); + TransitivePathSide left1{std::nullopt, 0, + Variable("?_QLever_internal_variable_qp_0"), 0}; + TransitivePathSide left2{std::nullopt, 0, + Variable("?_QLever_internal_variable_qp_7"), 0}; + TransitivePathSide right{std::nullopt, 1, Variable("?type"), 1}; + std::string query = + "SELECT * WHERE {\n" + " /(*|*) | /(*|*) ?type .\n" + "}"; + + h::expectWithGivenBudgets( + std::move(query), + h::Union( + h::Union( + h::TransitivePath( + left1, right, 0, std::numeric_limits::max(), + h::IndexScanFromStrings("", "", + "?_QLever_internal_variable_qp_0"), + h::IndexScanFromStrings("?_QLever_internal_variable_qp_2", + "", + "?_QLever_internal_variable_qp_3")), + h::TransitivePath( + left1, right, 0, std::numeric_limits::max(), + h::IndexScanFromStrings("", "", + "?_QLever_internal_variable_qp_0"), + h::IndexScanFromStrings("?_QLever_internal_variable_qp_4", + "", + "?_QLever_internal_variable_qp_5"))), + h::Union( + h::TransitivePath( + left2, right, 0, std::numeric_limits::max(), + h::IndexScanFromStrings("", "", + "?_QLever_internal_variable_qp_7"), + h::IndexScanFromStrings("?_QLever_internal_variable_qp_9", + "", + "?_QLever_internal_variable_qp_10")), + h::TransitivePath( + left2, right, 0, std::numeric_limits::max(), + h::IndexScanFromStrings("", "", + "?_QLever_internal_variable_qp_7"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_11", "", + "?_QLever_internal_variable_qp_12")))), + qec, {4, 16, 64'000'000}); + + TransitivePathSide left3{std::nullopt, 0, Variable("?s"), 0}; + TransitivePathSide right2{std::nullopt, 1, Variable("?y"), 1}; + + h::expectWithGivenBudgets( + "SELECT * WHERE { ?s ?o . { ?s + ?y } UNION { VALUES ?x { 1 " + "} }}", + h::Union( + h::TransitivePath(left3, right2, 1, + std::numeric_limits::max(), + h::IndexScanFromStrings("?s", "", "?o"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_0", "", + "?_QLever_internal_variable_qp_1")), + h::CartesianProductJoin(h::IndexScanFromStrings("?s", "", "?o"), + h::ValuesClause("VALUES (?x) { (1) }"))), + qec, {4, 16, 64'000'000}); + + h::expectWithGivenBudgets( + "SELECT * WHERE { { VALUES ?x { 1 } } UNION { ?s + ?y } . " + "?s ?o }", + h::Union( + h::CartesianProductJoin(h::ValuesClause("VALUES (?x) { (1) }"), + h::IndexScanFromStrings("?s", "", "?o")), + h::TransitivePath(std::move(left3), std::move(right2), 1, + std::numeric_limits::max(), + h::IndexScanFromStrings("?s", "", "?o"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_0", "", + "?_QLever_internal_variable_qp_1"))), + qec, {4, 16, 64'000'000}); +} + +// _____________________________________________________________________________ +TEST(QueryPlanner, ensurePlanningIsSkippedWhenNoTransitivePathIsPresent) { + auto qp = makeQueryPlanner(); + { + auto query = SparqlParser::parseQuery( + "SELECT * WHERE { ?x ?o ." + "{ VALUES ?x { 1 } } UNION { VALUES ?x { 1 } }}"); + auto plans = qp.createExecutionTrees(query); + ASSERT_EQ(plans.size(), 1); + EXPECT_TRUE( + std::dynamic_pointer_cast(plans.at(0)._qet->getRootOperation())); + } + { + auto query = SparqlParser::parseQuery( + "SELECT * WHERE { ?x ?o . " + "{ { VALUES ?x { 1 } } UNION { VALUES ?x { 1 } } } " + "UNION " + "{ { VALUES ?x { 1 } } UNION { VALUES ?x { 1 } } } }"); + auto plans = qp.createExecutionTrees(query); + ASSERT_EQ(plans.size(), 1); + EXPECT_TRUE( + std::dynamic_pointer_cast(plans.at(0)._qet->getRootOperation())); + } +} + +// _____________________________________________________________________________ +TEST(QueryPlanner, ensurePlanningIsSkippedWhenTransitivePathIsAlreadyBound) { + auto qp = makeQueryPlanner(); + auto query = SparqlParser::parseQuery( + "SELECT * { { VALUES ?x { 1 } } UNION { ?s + 1 } . ?s ?o }"); + auto plans = qp.createExecutionTrees(query); + ASSERT_EQ(plans.size(), 1); + EXPECT_TRUE( + std::dynamic_pointer_cast(plans.at(0)._qet->getRootOperation())); +} + +// _____________________________________________________________________________ +TEST(QueryPlanner, testDistributiveJoinInUnionRecursive) { + auto* qec = ad_utility::testing::getQec( + " . . . ." + " . . . ." + " . ."); + TransitivePathSide left1{std::nullopt, 2, + Variable("?_QLever_internal_variable_qp_0"), 0}; + TransitivePathSide left2{std::nullopt, 0, + Variable("?_QLever_internal_variable_qp_4"), 0}; + TransitivePathSide left3{std::nullopt, 0, + Variable("?_QLever_internal_variable_qp_13"), 0}; + TransitivePathSide right1{std::nullopt, 1, Variable("?type"), 1}; + TransitivePathSide right2{std::nullopt, 1, + Variable("?_QLever_internal_variable_qp_3"), 1}; + TransitivePathSide right3{std::nullopt, 1, + Variable("?_QLever_internal_variable_qp_12"), 1}; + std::string query = + "SELECT * WHERE {\n" + " " + " /((/(*|*))*|(/(*|*))*)" + " ?type .\n" + "}"; + + h::expectWithGivenBudgets( + std::move(query), + h::Union(h::TransitivePath( + left1, right1, 0, std::numeric_limits::max(), + h::IndexScanFromStrings("", "", + "?_QLever_internal_variable_qp_0"), + h::Sort(h::Union( + h::TransitivePath( + left2, right2, 0, std::numeric_limits::max(), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_2", "", + "?_QLever_internal_variable_qp_4"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_6", "", + "?_QLever_internal_variable_qp_7")), + h::TransitivePath( + left2, right2, 0, std::numeric_limits::max(), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_2", "", + "?_QLever_internal_variable_qp_4"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_8", "", + "?_QLever_internal_variable_qp_9"))))), + h::TransitivePath( + left1, right1, 0, std::numeric_limits::max(), + h::IndexScanFromStrings("", "", + "?_QLever_internal_variable_qp_0"), + h::Sort(h::Union( + h::TransitivePath( + left3, right3, 0, std::numeric_limits::max(), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_11", "", + "?_QLever_internal_variable_qp_13"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_15", "", + "?_QLever_internal_variable_qp_16")), + h::TransitivePath( + left3, right3, 0, std::numeric_limits::max(), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_11", "", + "?_QLever_internal_variable_qp_13"), + h::IndexScanFromStrings( + "?_QLever_internal_variable_qp_17", "", + "?_QLever_internal_variable_qp_18")))))), + qec, {4, 16, 64'000'000}); +} diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 7b03a8d0ef..87842cc8ea 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -482,6 +482,16 @@ void expectWithGivenBudget(std::string query, auto matcher, EXPECT_THAT(qet, matcher); } +// Same as `expectWithGivenBudget` but allows multiple budgets to be tested. +void expectWithGivenBudgets(std::string query, auto matcher, + std::optional optQec, + std::vector queryPlanningBudgets, + source_location l = source_location::current()) { + for (size_t budget : queryPlanningBudgets) { + expectWithGivenBudget(query, matcher, optQec, budget, l); + } +} + // Same as `expectWithGivenBudget` above, but always use the greedy query // planner. void expectGreedy(std::string query, auto matcher, @@ -505,13 +515,7 @@ void expectDynamicProgramming( void expect(std::string query, auto matcher, std::optional optQec = std::nullopt, source_location l = source_location::current()) { - auto e = [&](size_t budget) { - expectWithGivenBudget(query, matcher, optQec, budget, l); - }; - e(0); - e(1); - e(4); - e(16); - e(64'000'000); + expectWithGivenBudgets(std::move(query), std::move(matcher), + std::move(optQec), {0, 1, 4, 16, 64'000'000}, l); } } // namespace queryPlannerTestHelpers