Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Olap to S3 data transfer fix #13753

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2113,6 +2113,125 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
}
}

Y_UNIT_TEST(TestOlapToS3Insert) {
const TString root = "/Root/";
const TString source = "source";
const TString table1 = "table1";
const TString table2 = "table2";
const TString bucket = "bucket";

CreateBucket(bucket);

auto kikimr = NTestUtils::MakeKikimrRunner();

auto tc = kikimr->GetTableClient();
auto session = tc.CreateSession().GetValueSync().GetSession();

const TString olapTable = "DestinationOlap";

const TString query = fmt::format(R"(
CREATE EXTERNAL DATA SOURCE `{source}` WITH (
SOURCE_TYPE="ObjectStorage",
LOCATION="{location}",
AUTH_METHOD="NONE"
);
CREATE EXTERNAL TABLE `{table1}` (
key Int64 NOT NULL,
value String NOT NULL,
) WITH (
DATA_SOURCE="{source}",
LOCATION="/{location_table1}/",
FORMAT="csv_with_names"
);
CREATE EXTERNAL TABLE `{table2}` (
key Int64 NOT NULL,
value String NOT NULL,
year String NOT NULL
) WITH (
DATA_SOURCE="{source}",
LOCATION="/{location_table2}/",
FORMAT="csv_with_names",
PARTITIONED_BY="['year']"
);
CREATE TABLE `{olap_table}` (
key Int64 NOT NULL,
value String NOT NULL,
PRIMARY KEY (key)
)
WITH (STORE = COLUMN);)",
"location"_a = GetBucketLocation(bucket),
"source"_a = root + source,
"table1"_a = root + table1,
"table2"_a = root + table2,
"location_table1"_a = table1,
"location_table2"_a = table2,
"olap_table"_a = olapTable
);
auto result = session.ExecuteSchemeQuery(query).GetValueSync();
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString());

auto db = kikimr->GetQueryClient();

{
const TString sql = fmt::format(R"(
INSERT INTO {destination}
SELECT key, value FROM {source};)",
"destination"_a = table1,
"source"_a = olapTable);

auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync();
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId);

NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
}

{
const TString sql = fmt::format(R"(
INSERT INTO {destination}
SELECT key, value FROM {source} LIMIT 1;)",
"destination"_a = table1,
"source"_a = olapTable);

auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync();
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId);

NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
}

{
const TString sql = fmt::format(R"(
INSERT INTO {destination}
SELECT key, value, "2024" AS year FROM {source};)",
"destination"_a = table2,
"source"_a = olapTable);

auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync();
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId);

NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
}

{
const TString sql = fmt::format(R"(
INSERT INTO {destination}
SELECT key, value, "2024" AS year FROM {source} LIMIT 1;)",
"destination"_a = table2,
"source"_a = olapTable);

auto scriptExecutionOperation = db.ExecuteScript(sql).ExtractValueSync();
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
UNIT_ASSERT(scriptExecutionOperation.Metadata().ExecutionId);

NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
}
}
}

} // namespace NKikimr::NKqp
32 changes: 22 additions & 10 deletions ydb/library/yql/providers/s3/provider/yql_s3_phy_opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,29 @@ class TS3PhysicalOptProposalTransformer : public TOptimizeTransformerBase {

if (!FindNode(input.Ptr(), [] (const TExprNode::TPtr& node) { return node->IsCallable(TCoDataSource::CallableName()); })) {
YQL_CLOG(INFO, ProviderS3) << "Rewrite pure S3WriteObject `" << cluster << "`.`" << target.Path().StringValue() << "` as stage with sink.";
auto shouldBePassedAsInput = FindNode(input.Ptr(), [] (const TExprNode::TPtr& node) { return node->IsCallable(TDqStage::CallableName()); });

auto stageInputs = Build<TExprList>(ctx, writePos);
auto toFlow = Build<TCoToFlow>(ctx, writePos);
TVector<TCoArgument> args;

if (shouldBePassedAsInput) {
auto arg = Build<TCoArgument>(ctx, writePos).Name("in").Done();
stageInputs.Add(input);
args.push_back(arg);
toFlow.Input(arg);
}
else {
toFlow.Input(input);
}

return keys.empty() ?
Build<TDqStage>(ctx, writePos)
.Inputs().Build()
.Inputs(stageInputs.Done())
.Program<TCoLambda>()
.Args({})
.Args(args)
.Body<TS3SinkOutput>()
.Input<TCoToFlow>()
.Input(input)
.Build()
.Input(toFlow.Done())
.Format(target.Format())
.KeyColumns().Build()
.Settings(sinkOutputSettingsBuilder.Done())
Expand Down Expand Up @@ -237,12 +251,10 @@ class TS3PhysicalOptProposalTransformer : public TOptimizeTransformerBase {
.Add<TDqCnHashShuffle>()
.Output<TDqOutput>()
.Stage<TDqStage>()
.Inputs().Build()
.Inputs(stageInputs.Done())
.Program<TCoLambda>()
.Args({})
.Body<TCoToFlow>()
.Input(input)
.Build()
.Args(args)
.Body(toFlow.Done())
.Build()
.Settings().Build()
.Build()
Expand Down
Loading