diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java index 2d2d74bd3..cfb6ea50f 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java @@ -5,6 +5,10 @@ */ package com.linkedin.coral.trino.rel2trino; +import java.math.BigDecimal; +import java.util.List; +import java.util.Optional; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.RelShuttleImpl; @@ -22,6 +26,8 @@ import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; @@ -36,6 +42,11 @@ import com.linkedin.coral.hive.hive2rel.functions.GenericProjectFunction; import com.linkedin.coral.trino.rel2trino.functions.GenericProjectToTrinoConverter; +import static com.linkedin.coral.trino.rel2trino.UDFMapUtils.createUDF; +import static org.apache.calcite.sql.fun.SqlStdOperatorTable.MULTIPLY; +import static org.apache.calcite.sql.type.ReturnTypes.explicit; +import static org.apache.calcite.sql.type.SqlTypeName.*; + public class Calcite2TrinoUDFConverter { private Calcite2TrinoUDFConverter() { @@ -125,7 +136,7 @@ public RelNode visit(RelNode other) { } private TrinoRexConverter getTrinoRexConverter(RelNode node) { - return new TrinoRexConverter(node.getCluster().getRexBuilder()); + return new TrinoRexConverter(node.getCluster().getRexBuilder(), node.getCluster().getTypeFactory()); } }; return calciteNode.accept(converter); @@ -136,6 +147,7 @@ private TrinoRexConverter getTrinoRexConverter(RelNode node) { */ public static class TrinoRexConverter extends RexShuttle { private final RexBuilder rexBuilder; + private final RelDataTypeFactory typeFactory; // SUPPORTED_TYPE_CAST_MAP is a static mapping that maps a SqlTypeFamily key to its set of // type-castable SqlTypeFamilies. @@ -145,8 +157,9 @@ public static class TrinoRexConverter extends RexShuttle { .putAll(SqlTypeFamily.CHARACTER, SqlTypeFamily.NUMERIC, SqlTypeFamily.BOOLEAN).build(); } - public TrinoRexConverter(RexBuilder rexBuilder) { + public TrinoRexConverter(RexBuilder rexBuilder, RelDataTypeFactory typeFactory) { this.rexBuilder = rexBuilder; + this.typeFactory = typeFactory; } @Override @@ -160,6 +173,13 @@ public RexNode visitCall(RexCall call) { return GenericProjectToTrinoConverter.convertGenericProject(rexBuilder, call); } + if (call.getOperator().getName().equals("from_utc_timestamp")) { + Optional modifiedCall = visitFromUtcTimestampCall(call); + if (modifiedCall.isPresent()) { + return modifiedCall.get(); + } + } + final UDFTransformer transformer = CalciteTrinoUDFMap.getUDFTransformer(call.getOperator().getName(), call.operands.size()); if (transformer != null) { @@ -169,6 +189,61 @@ public RexNode visitCall(RexCall call) { return super.visitCall(modifiedCall); } + private Optional visitFromUtcTimestampCall(RexCall call) { + RelDataType inputType = call.getOperands().get(0).getType(); + // TODO(trinodb/trino#6295) support high-precision timestamp + RelDataType targetType = typeFactory.createSqlType(TIMESTAMP, 3); + + List convertedOperands = visitList(call.getOperands(), (boolean[]) null); + RexNode sourceValue = convertedOperands.get(0); + RexNode timezone = convertedOperands.get(1); + + // In below definitions we should use `TIMESTATMP WITH TIME ZONE`. As calcite is lacking + // this type we use `TIMESTAMP` instead. It does not have any practical implications as result syntax tree + // is not type-checked, and only used for generating output SQL for a view query. + SqlOperator trinoAtTimeZone = createUDF("at_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */)); + SqlOperator trinoWithTimeZone = createUDF("with_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */)); + SqlOperator trinoToUnixTime = createUDF("to_unixtime", explicit(DOUBLE)); + SqlOperator trinoFromUnixtimeNanos = + createUDF("from_unixtime_nanos", explicit(TIMESTAMP /* should be WITH TIME ZONE */)); + SqlOperator trinoFromUnixTime = createUDF("from_unixtime", explicit(TIMESTAMP /* should be WITH TIME ZONE */)); + SqlOperator trinoCanonicalizeHiveTimezoneId = createUDF("$canonicalize_hive_timezone_id", explicit(VARCHAR)); + + RelDataType bigintType = typeFactory.createSqlType(BIGINT); + RelDataType doubleType = typeFactory.createSqlType(DOUBLE); + + if (inputType.getSqlTypeName() == BIGINT || inputType.getSqlTypeName() == INTEGER + || inputType.getSqlTypeName() == SMALLINT || inputType.getSqlTypeName() == TINYINT) { + + return Optional.of(rexBuilder.makeCast(targetType, + rexBuilder.makeCall(trinoAtTimeZone, + rexBuilder.makeCall(trinoFromUnixtimeNanos, + rexBuilder.makeCall(MULTIPLY, rexBuilder.makeCast(bigintType, sourceValue), + rexBuilder.makeBigintLiteral(BigDecimal.valueOf(1000000)))), + rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone)))); + } + + if (inputType.getSqlTypeName() == DOUBLE || inputType.getSqlTypeName() == FLOAT + || inputType.getSqlTypeName() == DECIMAL) { + + return Optional.of(rexBuilder.makeCast(targetType, + rexBuilder.makeCall(trinoAtTimeZone, + rexBuilder.makeCall(trinoFromUnixTime, rexBuilder.makeCast(doubleType, sourceValue)), + rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone)))); + } + + if (inputType.getSqlTypeName() == TIMESTAMP || inputType.getSqlTypeName() == DATE) { + return Optional.of(rexBuilder.makeCast(targetType, + rexBuilder.makeCall(trinoAtTimeZone, + rexBuilder.makeCall(trinoFromUnixTime, + rexBuilder.makeCall(trinoToUnixTime, + rexBuilder.makeCall(trinoWithTimeZone, sourceValue, rexBuilder.makeLiteral("UTC")))), + rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone)))); + } + + return Optional.empty(); + } + // [CORAL-18] Hive is inconsistent and allows everything to be string so we make // this adjustment for common case. Ideally, we should use Rel collation to // check if this is HiveCollation before applying such rules but we don't use diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index 2de5a7a12..f66035df4 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -138,6 +138,19 @@ public Object[][] viewTestCasesProvider() { + "\"if\"(\"REGEXP_LIKE\"('rocks', '^[^\\\"]*$'), CAST(\"json_extract\"(\"$cor0\".\"b\".\"b1\", '$[\"' || 'rocks' || '\"]') AS VARCHAR(65535)), NULL) AS \"f\"\n" + "FROM (VALUES (0)) AS \"t\" (\"ZERO\")) AS \"t0\"" }, - { "test", "get_json_object_view", "SELECT \"json_extract\"(\"b\".\"b1\", '$.name')\nFROM \"test\".\"tablea\"" } }; + { "test", "get_json_object_view", "SELECT \"json_extract\"(\"b\".\"b1\", '$.name')\nFROM \"test\".\"tablea\"" }, + + { "test", "view_from_utc_timestamp", "SELECT " + + "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), " + + "CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n" + + "FROM \"test\".\"table_from_utc_timestamp\"" }, }; } } diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java index 9250f0492..352aa2050 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java @@ -288,6 +288,22 @@ public static void initializeViews(Path metastoreDbDirectory) throws HiveExcepti + "SELECT `t`.`a`, `jt`.`d`, `jt`.`e`, `jt`.`f` FROM `test`.`tableA` AS `t` LATERAL VIEW json_tuple(`t`.`b`.`b1`, \"trino\", \"always\", \"rocks\") `jt` AS `d`, `e`, `f`"); run(driver, "CREATE VIEW IF NOT EXISTS test.get_json_object_view AS \n" + "SELECT get_json_object(b.b1, '$.name') FROM test.tableA"); + + run(driver, + "CREATE TABLE test.table_from_utc_timestamp (a_tinyint tinyint, a_smallint smallint, " + + "a_integer int, a_bigint bigint, a_float float, a_double double, " + + "a_decimal_three decimal(10,3), a_decimal_zero decimal(10,0), a_timestamp timestamp, " + "a_date date)"); + run(driver, + "CREATE VIEW test.view_from_utc_timestamp AS SELECT from_utc_timestamp(a_tinyint, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_smallint, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_integer, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_bigint, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_float, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_double, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_decimal_three, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_decimal_zero, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_timestamp, 'America/Los_Angeles'), " + + "from_utc_timestamp(a_date, 'America/Los_Angeles')" + "FROM test.table_from_utc_timestamp"); } public static RelNode convertView(String db, String view) { diff --git a/version.properties b/version.properties index 7876fce23..aa625747a 100644 --- a/version.properties +++ b/version.properties @@ -1,3 +1,3 @@ # Version of the produced binaries. # The version is inferred by shipkit-auto-version Gradle plugin (https://github.com/shipkit/shipkit-auto-version) -version=1.0.* \ No newline at end of file +version=1.0.*