Skip to content

Commit

Permalink
Add support for from_utc_timestamp in Hive->Trino translation (#109)
Browse files Browse the repository at this point in the history
  • Loading branch information
losipiuk authored Aug 10, 2021
1 parent c2e5a67 commit b311a87
Showing 4 changed files with 108 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -5,6 +5,10 @@
*/
package com.linkedin.coral.trino.rel2trino;

import java.math.BigDecimal;
import java.util.List;
import java.util.Optional;

import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelShuttle;
import org.apache.calcite.rel.RelShuttleImpl;
@@ -22,6 +26,8 @@
import org.apache.calcite.rel.logical.LogicalSort;
import org.apache.calcite.rel.logical.LogicalUnion;
import org.apache.calcite.rel.logical.LogicalValues;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
@@ -36,6 +42,11 @@
import com.linkedin.coral.hive.hive2rel.functions.GenericProjectFunction;
import com.linkedin.coral.trino.rel2trino.functions.GenericProjectToTrinoConverter;

import static com.linkedin.coral.trino.rel2trino.UDFMapUtils.createUDF;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.MULTIPLY;
import static org.apache.calcite.sql.type.ReturnTypes.explicit;
import static org.apache.calcite.sql.type.SqlTypeName.*;


public class Calcite2TrinoUDFConverter {
private Calcite2TrinoUDFConverter() {
@@ -125,7 +136,7 @@ public RelNode visit(RelNode other) {
}

private TrinoRexConverter getTrinoRexConverter(RelNode node) {
return new TrinoRexConverter(node.getCluster().getRexBuilder());
return new TrinoRexConverter(node.getCluster().getRexBuilder(), node.getCluster().getTypeFactory());
}
};
return calciteNode.accept(converter);
@@ -136,6 +147,7 @@ private TrinoRexConverter getTrinoRexConverter(RelNode node) {
*/
public static class TrinoRexConverter extends RexShuttle {
private final RexBuilder rexBuilder;
private final RelDataTypeFactory typeFactory;

// SUPPORTED_TYPE_CAST_MAP is a static mapping that maps a SqlTypeFamily key to its set of
// type-castable SqlTypeFamilies.
@@ -145,8 +157,9 @@ public static class TrinoRexConverter extends RexShuttle {
.putAll(SqlTypeFamily.CHARACTER, SqlTypeFamily.NUMERIC, SqlTypeFamily.BOOLEAN).build();
}

public TrinoRexConverter(RexBuilder rexBuilder) {
public TrinoRexConverter(RexBuilder rexBuilder, RelDataTypeFactory typeFactory) {
this.rexBuilder = rexBuilder;
this.typeFactory = typeFactory;
}

@Override
@@ -160,6 +173,13 @@ public RexNode visitCall(RexCall call) {
return GenericProjectToTrinoConverter.convertGenericProject(rexBuilder, call);
}

if (call.getOperator().getName().equals("from_utc_timestamp")) {
Optional<RexNode> modifiedCall = visitFromUtcTimestampCall(call);
if (modifiedCall.isPresent()) {
return modifiedCall.get();
}
}

final UDFTransformer transformer =
CalciteTrinoUDFMap.getUDFTransformer(call.getOperator().getName(), call.operands.size());
if (transformer != null) {
@@ -169,6 +189,61 @@ public RexNode visitCall(RexCall call) {
return super.visitCall(modifiedCall);
}

private Optional<RexNode> visitFromUtcTimestampCall(RexCall call) {
RelDataType inputType = call.getOperands().get(0).getType();
// TODO(trinodb/trino#6295) support high-precision timestamp
RelDataType targetType = typeFactory.createSqlType(TIMESTAMP, 3);

List<RexNode> convertedOperands = visitList(call.getOperands(), (boolean[]) null);
RexNode sourceValue = convertedOperands.get(0);
RexNode timezone = convertedOperands.get(1);

// In below definitions we should use `TIMESTATMP WITH TIME ZONE`. As calcite is lacking
// this type we use `TIMESTAMP` instead. It does not have any practical implications as result syntax tree
// is not type-checked, and only used for generating output SQL for a view query.
SqlOperator trinoAtTimeZone = createUDF("at_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoWithTimeZone = createUDF("with_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoToUnixTime = createUDF("to_unixtime", explicit(DOUBLE));
SqlOperator trinoFromUnixtimeNanos =
createUDF("from_unixtime_nanos", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoFromUnixTime = createUDF("from_unixtime", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoCanonicalizeHiveTimezoneId = createUDF("$canonicalize_hive_timezone_id", explicit(VARCHAR));

RelDataType bigintType = typeFactory.createSqlType(BIGINT);
RelDataType doubleType = typeFactory.createSqlType(DOUBLE);

if (inputType.getSqlTypeName() == BIGINT || inputType.getSqlTypeName() == INTEGER
|| inputType.getSqlTypeName() == SMALLINT || inputType.getSqlTypeName() == TINYINT) {

return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixtimeNanos,
rexBuilder.makeCall(MULTIPLY, rexBuilder.makeCast(bigintType, sourceValue),
rexBuilder.makeBigintLiteral(BigDecimal.valueOf(1000000)))),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

if (inputType.getSqlTypeName() == DOUBLE || inputType.getSqlTypeName() == FLOAT
|| inputType.getSqlTypeName() == DECIMAL) {

return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixTime, rexBuilder.makeCast(doubleType, sourceValue)),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

if (inputType.getSqlTypeName() == TIMESTAMP || inputType.getSqlTypeName() == DATE) {
return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixTime,
rexBuilder.makeCall(trinoToUnixTime,
rexBuilder.makeCall(trinoWithTimeZone, sourceValue, rexBuilder.makeLiteral("UTC")))),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

return Optional.empty();
}

// [CORAL-18] Hive is inconsistent and allows everything to be string so we make
// this adjustment for common case. Ideally, we should use Rel collation to
// check if this is HiveCollation before applying such rules but we don't use
Original file line number Diff line number Diff line change
@@ -138,6 +138,19 @@ public Object[][] viewTestCasesProvider() {
+ "\"if\"(\"REGEXP_LIKE\"('rocks', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'rocks' || '\"]') AS VARCHAR(65535)), NULL) AS \"f\"\n"
+ "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"" },

{ "test", "get_json_object_view", "SELECT \"json_extract\"(\"b\".\"b1\", '$.name')\nFROM \"test\".\"tablea\"" } };
{ "test", "get_json_object_view", "SELECT \"json_extract\"(\"b\".\"b1\", '$.name')\nFROM \"test\".\"tablea\"" },

{ "test", "view_from_utc_timestamp", "SELECT "
+ "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), "
+ "CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n"
+ "FROM \"test\".\"table_from_utc_timestamp\"" }, };
}
}
Original file line number Diff line number Diff line change
@@ -288,6 +288,22 @@ public static void initializeViews(Path metastoreDbDirectory) throws HiveExcepti
+ "SELECT `t`.`a`, `jt`.`d`, `jt`.`e`, `jt`.`f` FROM `test`.`tableA` AS `t` LATERAL VIEW json_tuple(`t`.`b`.`b1`, \"trino\", \"always\", \"rocks\") `jt` AS `d`, `e`, `f`");
run(driver, "CREATE VIEW IF NOT EXISTS test.get_json_object_view AS \n"
+ "SELECT get_json_object(b.b1, '$.name') FROM test.tableA");

run(driver,
"CREATE TABLE test.table_from_utc_timestamp (a_tinyint tinyint, a_smallint smallint, "
+ "a_integer int, a_bigint bigint, a_float float, a_double double, "
+ "a_decimal_three decimal(10,3), a_decimal_zero decimal(10,0), a_timestamp timestamp, " + "a_date date)");
run(driver,
"CREATE VIEW test.view_from_utc_timestamp AS SELECT from_utc_timestamp(a_tinyint, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_smallint, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_integer, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_bigint, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_float, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_double, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_decimal_three, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_decimal_zero, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_timestamp, 'America/Los_Angeles'), "
+ "from_utc_timestamp(a_date, 'America/Los_Angeles')" + "FROM test.table_from_utc_timestamp");
}

public static RelNode convertView(String db, String view) {
2 changes: 1 addition & 1 deletion version.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Version of the produced binaries.
# The version is inferred by shipkit-auto-version Gradle plugin (https://github.com/shipkit/shipkit-auto-version)
version=1.0.*
version=1.0.*

0 comments on commit b311a87

Please sign in to comment.