Skip to content

Commit

Permalink
Creates stubbed percentile aggregate functions (#9)
Browse files Browse the repository at this point in the history
Creates stubbed percentile aggregate functions

Inludes PERCENTILE_1, PERCENTILE_5, PERCENTILE_25,
PERCENTILE_75, PERCENTILE_95, and PERCENTILE_99 aggregate
functions as well as a MEDIAN function, which is functionally
equivalent to PERCENTILE_50. The underlying implementation is
stubbed and simply returns the percentile value (e.g.
PERCENTILE_25 returns 25, PERCENTILE_75 returns 75, etc).

A t-digest based implementation will come soon. This commit
is to unblock teams choosing to begin integrating the new
aggregate functions into their codebases.
  • Loading branch information
rkennedy-mode authored Mar 30, 2020
1 parent c8b075c commit b1eb4ba
Show file tree
Hide file tree
Showing 15 changed files with 1,000 additions and 12 deletions.
75 changes: 75 additions & 0 deletions src/ee/common/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,51 @@ string expressionToString(ExpressionType type)
case EXPRESSION_TYPE_AGGREGATE_COMPACT_TO_CARDINALITY: {
return "AGGREGATE_COMPACT_TO_CARDINALITY";
}
case EXPRESSION_TYPE_AGGREGATE_VALUES_TO_TDIGEST: {
return "AGGREGATE_VALUES_TO_TDIGEST";
}
case EXPRESSION_TYPE_AGGREGATE_MEDIAN: {
return "AGGREGATE_MEDIAN";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_MEDIAN: {
return "AGGREGATE_TDIGEST_TO_MEDIAN";
}
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_1: {
return "AGGREGATE_PERCENTILE_1";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_1: {
return "AGGREGATE_TDIGEST_TO_PERCENTILE_1";
}
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_5: {
return "AGGREGATE_PERCENTILE_5";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_5: {
return "AGGREGATE_TDIGEST_TO_PERCENTILE_5";
}
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_25: {
return "AGGREGATE_PERCENTILE_25";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_25: {
return "AGGREGATE_TDIGEST_TO_PERCENTILE_25";
}
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_75: {
return "AGGREGATE_PERCENTILE_75";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_75: {
return "AGGREGATE_TDIGEST_TO_PERCENTILE_75";
}
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_95: {
return "AGGREGATE_PERCENTILE_95";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_95: {
return "AGGREGATE_TDIGEST_TO_PERCENTILE_95";
}
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_99: {
return "AGGREGATE_PERCENTILE_99";
}
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_99: {
return "AGGREGATE_TDIGEST_TO_PERCENTILE_99";
}
case EXPRESSION_TYPE_AGGREGATE_WINDOWED_RANK: {
return "EXPRESSION_TYPE_AGGREGATE_WINDOWED_RANK";
}
Expand Down Expand Up @@ -724,6 +769,36 @@ ExpressionType stringToExpression(string str )
return EXPRESSION_TYPE_AGGREGATE_VALUES_TO_COMPACT;
} else if (str == "AGGREGATE_COMPACT_TO_CARDINALITY") {
return EXPRESSION_TYPE_AGGREGATE_COMPACT_TO_CARDINALITY;
} else if (str == "AGGREGATE_VALUES_TO_TDIGEST") {
return EXPRESSION_TYPE_AGGREGATE_VALUES_TO_TDIGEST;
} else if (str == "AGGREGATE_MEDIAN") {
return EXPRESSION_TYPE_AGGREGATE_MEDIAN;
} else if (str == "AGGREGATE_TDIGEST_TO_MEDIAN") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_MEDIAN;
} else if (str == "AGGREGATE_PERCENTILE_1") {
return EXPRESSION_TYPE_AGGREGATE_PERCENTILE_1;
} else if (str == "AGGREGATE_TDIGEST_TO_PERCENTILE_1") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_1;
} else if (str == "AGGREGATE_PERCENTILE_5") {
return EXPRESSION_TYPE_AGGREGATE_PERCENTILE_5;
} else if (str == "AGGREGATE_TDIGEST_TO_PERCENTILE_5") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_5;
} else if (str == "AGGREGATE_PERCENTILE_25") {
return EXPRESSION_TYPE_AGGREGATE_PERCENTILE_25;
} else if (str == "AGGREGATE_TDIGEST_TO_PERCENTILE_25") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_25;
} else if (str == "AGGREGATE_PERCENTILE_75") {
return EXPRESSION_TYPE_AGGREGATE_PERCENTILE_75;
} else if (str == "AGGREGATE_TDIGEST_TO_PERCENTILE_75") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_75;
} else if (str == "AGGREGATE_PERCENTILE_95") {
return EXPRESSION_TYPE_AGGREGATE_PERCENTILE_95;
} else if (str == "AGGREGATE_TDIGEST_TO_PERCENTILE_95") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_95;
} else if (str == "AGGREGATE_PERCENTILE_99") {
return EXPRESSION_TYPE_AGGREGATE_PERCENTILE_99;
} else if (str == "AGGREGATE_TDIGEST_TO_PERCENTILE_99") {
return EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_99;
} else if (str == "AGGREGATE_WINDOWED_RANK") {
return EXPRESSION_TYPE_AGGREGATE_WINDOWED_RANK;
} else if (str == "AGGREGATE_WINDOWED_DENSE_RANK") {
Expand Down
39 changes: 27 additions & 12 deletions src/ee/common/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,18 +304,33 @@ enum ExpressionType {
// -----------------------------
// Aggregates
// -----------------------------
EXPRESSION_TYPE_AGGREGATE_COUNT = 40,
EXPRESSION_TYPE_AGGREGATE_COUNT_STAR = 41,
EXPRESSION_TYPE_AGGREGATE_SUM = 42,
EXPRESSION_TYPE_AGGREGATE_MIN = 43,
EXPRESSION_TYPE_AGGREGATE_MAX = 44,
EXPRESSION_TYPE_AGGREGATE_AVG = 45,
EXPRESSION_TYPE_AGGREGATE_APPROX_COUNT_DISTINCT = 46,
EXPRESSION_TYPE_AGGREGATE_VALS_TO_HYPERLOGLOG = 47,
EXPRESSION_TYPE_AGGREGATE_HYPERLOGLOGS_TO_CARD = 48,
EXPRESSION_TYPE_AGGREGATE_COMPACT_COUNT_DISTINCT = 49,
EXPRESSION_TYPE_AGGREGATE_VALUES_TO_COMPACT = 50,
EXPRESSION_TYPE_AGGREGATE_COMPACT_TO_CARDINALITY = 51,
EXPRESSION_TYPE_AGGREGATE_COUNT = 40,
EXPRESSION_TYPE_AGGREGATE_COUNT_STAR = 41,
EXPRESSION_TYPE_AGGREGATE_SUM = 42,
EXPRESSION_TYPE_AGGREGATE_MIN = 43,
EXPRESSION_TYPE_AGGREGATE_MAX = 44,
EXPRESSION_TYPE_AGGREGATE_AVG = 45,
EXPRESSION_TYPE_AGGREGATE_APPROX_COUNT_DISTINCT = 46,
EXPRESSION_TYPE_AGGREGATE_VALS_TO_HYPERLOGLOG = 47,
EXPRESSION_TYPE_AGGREGATE_HYPERLOGLOGS_TO_CARD = 48,
EXPRESSION_TYPE_AGGREGATE_COMPACT_COUNT_DISTINCT = 49,
EXPRESSION_TYPE_AGGREGATE_VALUES_TO_COMPACT = 50,
EXPRESSION_TYPE_AGGREGATE_COMPACT_TO_CARDINALITY = 51,
EXPRESSION_TYPE_AGGREGATE_VALUES_TO_TDIGEST = 52,
EXPRESSION_TYPE_AGGREGATE_MEDIAN = 53,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_MEDIAN = 54,
EXPRESSION_TYPE_AGGREGATE_PERCENTILE_1 = 55,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_1 = 56,
EXPRESSION_TYPE_AGGREGATE_PERCENTILE_5 = 57,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_5 = 58,
EXPRESSION_TYPE_AGGREGATE_PERCENTILE_25 = 59,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_25 = 60,
EXPRESSION_TYPE_AGGREGATE_PERCENTILE_75 = 61,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_75 = 62,
EXPRESSION_TYPE_AGGREGATE_PERCENTILE_95 = 63,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_95 = 64,
EXPRESSION_TYPE_AGGREGATE_PERCENTILE_99 = 65,
EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_99 = 66,

// -----------------------------
// Windowed Expression Aggregates.
Expand Down
87 changes: 87 additions & 0 deletions src/ee/executors/aggregateexecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,63 @@ class CompactToCardinalityAgg : public CompactCountDistinctAgg {
}
};

/// Single partition aggregate
class PercentileAgg : public Agg {
double m_percentile;

public:
PercentileAgg(double percentile) : m_percentile(percentile)
{
}

virtual void advance(const NValue& val)
{
// TODO: do the thing
}

virtual NValue finalize(ValueType type)
{
// TODO: do the thing
return ValueFactory::getDoubleValue(m_percentile);
}

virtual void resetAgg()
{
Agg::resetAgg();
}
};

/// Push-down (multi partition) aggregate
class ValuesToTDigestAgg : public Agg {
public:
virtual void advance(const NValue& val)
{
// TODO: do the thing
}

virtual NValue finalize(ValueType type)
{
// TODO: do the thing
assert (type == VALUE_TYPE_VARBINARY);
int byteSize = 0;
char *serializedBytes = new char[byteSize];
return ValueFactory::getTempBinaryValue(serializedBytes, byteSize);
}
};

/// Pull-up (multi partition) aggregate
class TDigestToPercentileAgg : public PercentileAgg {
public:
TDigestToPercentileAgg(double percentile) : PercentileAgg(percentile)
{
}

virtual void advance(const NValue& val)
{
// TODO: do the thing
}
};

/*
* Create an instance of an aggregator for the specified aggregate type and "distinct" flag.
* The object is allocated from the provided memory pool.
Expand Down Expand Up @@ -602,6 +659,36 @@ inline Agg* getAggInstance(Pool& memoryPool, ExpressionType agg_type, bool isDis
return new (memoryPool) ValuesToCompactAgg();
case EXPRESSION_TYPE_AGGREGATE_COMPACT_TO_CARDINALITY:
return new (memoryPool) CompactToCardinalityAgg();
case EXPRESSION_TYPE_AGGREGATE_VALUES_TO_TDIGEST:
return new (memoryPool) ValuesToTDigestAgg();
case EXPRESSION_TYPE_AGGREGATE_MEDIAN:
return new (memoryPool) PercentileAgg(50);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_MEDIAN:
return new (memoryPool) TDigestToPercentileAgg(50);
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_1:
return new (memoryPool) PercentileAgg(1);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_1:
return new (memoryPool) TDigestToPercentileAgg(1);
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_5:
return new (memoryPool) PercentileAgg(5);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_5:
return new (memoryPool) TDigestToPercentileAgg(5);
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_25:
return new (memoryPool) PercentileAgg(25);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_25:
return new (memoryPool) TDigestToPercentileAgg(25);
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_75:
return new (memoryPool) PercentileAgg(75);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_75:
return new (memoryPool) TDigestToPercentileAgg(75);
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_95:
return new (memoryPool) PercentileAgg(95);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_95:
return new (memoryPool) TDigestToPercentileAgg(95);
case EXPRESSION_TYPE_AGGREGATE_PERCENTILE_99:
return new (memoryPool) PercentileAgg(99);
case EXPRESSION_TYPE_AGGREGATE_TDIGEST_TO_PERCENTILE_99:
return new (memoryPool) TDigestToPercentileAgg(99);
default:
{
char message[128];
Expand Down
21 changes: 21 additions & 0 deletions src/frontend/org/voltdb/expressions/AggregateExpression.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,32 @@ public static void finalizeAggregateValueTypes(AbstractExpression expr)
expr.m_valueType = VoltType.BIGINT;
expr.m_valueSize = expr.m_valueType.getLengthInBytesForFixedTypes();
break;
case AGGREGATE_MEDIAN:
case AGGREGATE_PERCENTILE_1:
case AGGREGATE_PERCENTILE_5:
case AGGREGATE_PERCENTILE_25:
case AGGREGATE_PERCENTILE_75:
case AGGREGATE_PERCENTILE_95:
case AGGREGATE_PERCENTILE_99:
case AGGREGATE_TDIGEST_TO_MEDIAN:
case AGGREGATE_TDIGEST_TO_PERCENTILE_1:
case AGGREGATE_TDIGEST_TO_PERCENTILE_5:
case AGGREGATE_TDIGEST_TO_PERCENTILE_25:
case AGGREGATE_TDIGEST_TO_PERCENTILE_75:
case AGGREGATE_TDIGEST_TO_PERCENTILE_95:
case AGGREGATE_TDIGEST_TO_PERCENTILE_99:
//
// Always a float (double)
//
expr.m_valueType = VoltType.FLOAT;
expr.m_valueSize = expr.m_valueType.getLengthInBytesForFixedTypes();
break;
case AGGREGATE_VALS_TO_HYPERLOGLOG:
expr.m_valueType = VoltType.VARBINARY;
expr.m_valueSize = 65537;
break;
case AGGREGATE_VALUES_TO_COMPACT:
case AGGREGATE_VALUES_TO_TDIGEST:
expr.m_valueType = VoltType.VARBINARY;
expr.m_valueSize = 1024 * 1024 * 8; // AbstractExpression enforces 10_000_000
break;
Expand Down
41 changes: 41 additions & 0 deletions src/frontend/org/voltdb/planner/PlanAssembler.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -113,6 +115,15 @@ public ParsedResultAccumulator(boolean orderIsDeterministic,
}
}

private static final Set<ExpressionType> PERCENTILE_EXPRESSIONS = Collections.unmodifiableSet(EnumSet.of(
ExpressionType.AGGREGATE_MEDIAN,
ExpressionType.AGGREGATE_PERCENTILE_1,
ExpressionType.AGGREGATE_PERCENTILE_5,
ExpressionType.AGGREGATE_PERCENTILE_25,
ExpressionType.AGGREGATE_PERCENTILE_75,
ExpressionType.AGGREGATE_PERCENTILE_95,
ExpressionType.AGGREGATE_PERCENTILE_99));

/** convenience pointer to the database object in the catalog */
private final Database m_catalogDb;

Expand Down Expand Up @@ -2744,6 +2755,7 @@ else if (gbInfo.isChangedToPartialAggregate()) {
*/
else if (agg_expression_type != ExpressionType.AGGREGATE_MIN &&
agg_expression_type != ExpressionType.AGGREGATE_MAX &&
!PERCENTILE_EXPRESSIONS.contains(agg_expression_type) &&
agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT &&
agg_expression_type != ExpressionType.AGGREGATE_COMPACT_COUNT_DISTINCT) {
/*
Expand Down Expand Up @@ -3001,6 +3013,7 @@ private static void fixDistributedApproxCountDistinct(
List<ExpressionType> distAggTypes = distNode.getAggregateTypes();
boolean hasApproxCountDistinct = false;
boolean hasCompactCountDistinct = false;
boolean hasPercentile = false;
for (int i = 0; i < distAggTypes.size(); ++i) {
ExpressionType et = distAggTypes.get(i);
if (et == ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
Expand All @@ -3012,6 +3025,11 @@ private static void fixDistributedApproxCountDistinct(
hasCompactCountDistinct = true;
distNode.updateAggregate(i, ExpressionType.AGGREGATE_VALUES_TO_COMPACT);
}

if (PERCENTILE_EXPRESSIONS.contains(et)) {
hasPercentile = true;
distNode.updateAggregate(i, ExpressionType.AGGREGATE_VALUES_TO_TDIGEST);
}
}

if (hasApproxCountDistinct) {
Expand All @@ -3035,6 +3053,29 @@ private static void fixDistributedApproxCountDistinct(
}
}
}

if (hasPercentile) {
// Now, patch up any MEDIAN/PERCENTILE_X on the coordinating node.
List<ExpressionType> coordAggTypes = coordNode.getAggregateTypes();
for (int i = 0; i < coordAggTypes.size(); ++i) {
ExpressionType et = coordAggTypes.get(i);
if (et == ExpressionType.AGGREGATE_MEDIAN) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_MEDIAN);
} else if (et == ExpressionType.AGGREGATE_PERCENTILE_1) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_PERCENTILE_1);
} else if (et == ExpressionType.AGGREGATE_PERCENTILE_5) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_PERCENTILE_5);
} else if (et == ExpressionType.AGGREGATE_PERCENTILE_25) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_PERCENTILE_25);
} else if (et == ExpressionType.AGGREGATE_PERCENTILE_75) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_PERCENTILE_75);
} else if (et == ExpressionType.AGGREGATE_PERCENTILE_95) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_PERCENTILE_95);
} else if (et == ExpressionType.AGGREGATE_PERCENTILE_99) {
coordNode.updateAggregate(i, ExpressionType.AGGREGATE_TDIGEST_TO_PERCENTILE_99);
}
}
}
}

/**
Expand Down
15 changes: 15 additions & 0 deletions src/frontend/org/voltdb/types/ExpressionType.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,21 @@ public enum ExpressionType {
AGGREGATE_COMPACT_COUNT_DISTINCT (AggregateExpression.class, 49, "COMPACT_COUNT_DISTINCT", true),
AGGREGATE_VALUES_TO_COMPACT (AggregateExpression.class, 50, "VALUES_TO_COMPACT"),
AGGREGATE_COMPACT_TO_CARDINALITY (AggregateExpression.class, 51, "COMPACT_TO_CARDINALITY"),
AGGREGATE_VALUES_TO_TDIGEST (AggregateExpression.class, 52, "VALUES_TO_TDIGEST"),
AGGREGATE_MEDIAN (AggregateExpression.class, 53, "MEDIAN"),
AGGREGATE_TDIGEST_TO_MEDIAN (AggregateExpression.class, 54, "TDIGEST_TO_MEDIAN"),
AGGREGATE_PERCENTILE_1 (AggregateExpression.class, 55, "PERCENTILE_1"),
AGGREGATE_TDIGEST_TO_PERCENTILE_1 (AggregateExpression.class, 56, "TDIGEST_TO_PERCENTILE_1"),
AGGREGATE_PERCENTILE_5 (AggregateExpression.class, 57, "PERCENTILE_5"),
AGGREGATE_TDIGEST_TO_PERCENTILE_5 (AggregateExpression.class, 58, "TDIGEST_TO_PERCENTILE_5"),
AGGREGATE_PERCENTILE_25 (AggregateExpression.class, 59, "PERCENTILE_25"),
AGGREGATE_TDIGEST_TO_PERCENTILE_25 (AggregateExpression.class, 60, "TDIGEST_TO_PERCENTILE_25"),
AGGREGATE_PERCENTILE_75 (AggregateExpression.class, 61, "PERCENTILE_75"),
AGGREGATE_TDIGEST_TO_PERCENTILE_75 (AggregateExpression.class, 62, "TDIGEST_TO_PERCENTILE_75"),
AGGREGATE_PERCENTILE_95 (AggregateExpression.class, 63, "PERCENTILE_95"),
AGGREGATE_TDIGEST_TO_PERCENTILE_95 (AggregateExpression.class, 64, "TDIGEST_TO_PERCENTILE_95"),
AGGREGATE_PERCENTILE_99 (AggregateExpression.class, 65, "PERCENTILE_99"),
AGGREGATE_TDIGEST_TO_PERCENTILE_99 (AggregateExpression.class, 66, "TDIGEST_TO_PERCENTILE_99"),
// ----------------------------
// Windowed Aggregates. We need to treat these
// somewhat differently than the non-windowed
Expand Down
Loading

0 comments on commit b1eb4ba

Please sign in to comment.