Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[E2E Test][Improvement] Add Test Case for LocalFile CSV Source #8448

Open
wants to merge 5 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@ public class LocalFileIT extends TestSuiteBase {
Path txtLzo = convertToLzoFile(ContainerUtil.getResourcesFile("/text/e2e.txt"));
ContainerUtil.copyFileIntoContainers(
txtLzo, "/seatunnel/read/lzo_text/e2e.txt", container);
ContainerUtil.copyFileIntoContainers(
"/excel/e2e.csv",
"/seatunnel/read/csv/name=tyrantlucifer/hobby=coding/e2e.csv",
container);

ContainerUtil.copyFileIntoContainers(
"/excel/e2e.xlsx",
"/seatunnel/read/excel/name=tyrantlucifer/hobby=coding/e2e.xlsx",
Expand Down Expand Up @@ -295,9 +300,17 @@ public class LocalFileIT extends TestSuiteBase {
};

@TestTemplate
@DisabledOnContainer(
value = {TestContainerId.SPARK_2_4},
type = {EngineType.SPARK, EngineType.FLINK},
disabledReason =
"Fink test is multi-node, LocalFile connector will use different containers for obtaining files")
public void testLocalFileReadAndWrite(TestContainer container)
throws IOException, InterruptedException {
TestHelper helper = new TestHelper(container);
helper.execute("/excel/local_csv_to_assert.conf");
helper.execute("/excel/local_csv_to_assert_with_multipletable.conf");
helper.execute("/excel/fake_to_local_csv.conf");
helper.execute("/excel/fake_to_local_excel.conf");
helper.execute("/excel/local_excel_to_assert.conf");
helper.execute("/excel/local_excel_projection_to_assert.conf");
Expand Down Expand Up @@ -391,7 +404,7 @@ public void testLocalFileReadAndWrite(TestContainer container)
@TestTemplate
@DisabledOnContainer(
value = {TestContainerId.SPARK_2_4},
type = {EngineType.FLINK},
type = {EngineType.SPARK, EngineType.FLINK},
disabledReason =
"Fink test is multi-node, LocalFile connector will use different containers for obtaining files")
public void testLocalFileReadAndWriteWithSaveMode(TestContainer container)
Expand Down Expand Up @@ -437,6 +450,11 @@ private List<String> getFileListFromContainer(String path) {
}

@TestTemplate
@DisabledOnContainer(
value = {TestContainerId.SPARK_2_4},
type = {EngineType.SPARK, EngineType.FLINK},
disabledReason =
"Fink test is multi-node, LocalFile connector will use different containers for obtaining files")
public void testLocalFileCatalog(TestContainer container)
throws IOException, InterruptedException {
final LocalFileCatalog localFileCatalog =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
c_map c_array c_string c_boolean c_tinyint c_smallint c_int c_bigint c_float c_double c_bytes c_date c_decimal c_timestamp c_row
{"OQBqH":"wTKAH","rkvlO":"KXStv","pCMEX":"CyJKx","DAgdj":"SMbQe","dsJag":"jyFsb"} [111,222,333,444,555] rDAya true 25 22478 1333226130 3261 3.26072 3.2606 [77, 116, 89, 118, 119] 2023-06-13 3.26072348493343 2023-05-17 00:36:12 "gfBji":"emeXF","BCStD":"cUZwy","XyMkz":"CWBGW","nTHbQ":"Oaaos","IwjTs":"zyGjf"};[1939637044,1066960875,2141621606,1638720300,1269572120];YjilG;false;11;26418;1110174340;4728476106429842432;2.0147008E38;8.338846635117556E307;UxtrR;2023-12-18;76273205606379580629.661783233489743766;2023-03-04 07:08:58
{"OQBqH":"wTKAH","rkvlO":"KXStv","pCMEX":"CyJKx","DAgdj":"SMbQe","dsJag":"jyFsb"} [111,222,333,444,555] rDAya true 22 22472 1333226132 3262 3.26072 3.2602 [77, 116, 89, 118, 112] 2023-06-12 3.26072348493342 2023-05-17 00:36:12 "gfBji":"emeXF","BCStD":"cUZwy","XyMkz":"CWBGW","nTHbQ":"Oaaos","IwjTs":"zyGjf"};[1939637044,1066960875,2141621606,1638720300,1269572120];YjilG;false;11;26418;1110174340;4728476106429842432;2.0147008E38;8.338846635117556E307;UxtrR;2023-12-18;76273205606379580629.661783233489743766;2023-03-04 07:08:58
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

env {
parallelism = 1
job.mode = "BATCH"

# You can set spark configuration here
spark.app.name = "SeaTunnel"
spark.executor.instances = 2
spark.executor.cores = 1
spark.executor.memory = "1g"
spark.master = local
}

source {
FakeSource {
plugin_output = "fake"
schema = {
fields {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
c_row = {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
}
}
}
}
}

sink {
LocalFile {
path = "/tmp/seatunnel/csv"
partition_dir_expression = "${k0}=${v0}"
is_partition_field_write_in_file = true
file_name_expression = "${transactionId}_${now}"
file_format_type = "csv"
filename_time_format = "yyyy.MM.dd"
is_enable_transaction = true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

env {
parallelism = 1
job.mode = "BATCH"
# You can set spark configuration here
spark.app.name = "SeaTunnel"
spark.executor.instances = 2
spark.executor.cores = 1
spark.executor.memory = "1g"
spark.master = local
job.mode = "BATCH"
}

source {
LocalFile {
path = "/seatunnel/read/csv"
plugin_output = "fake"
file_format_type = csv
field_delimiter = "\t"
row_delimiter = "\n"
skip_header_row_number = 1
schema = {
table = "fake01"
fields {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp,
c_row = {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
}
}
}
}
}

sink {
Assert {
rules {
row_rules = [
{
rule_type = MAX_ROW
rule_value = 2
}
],
table-names = ["fake01"]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

env {
parallelism = 1
job.mode = "BATCH"
# You can set spark configuration here
spark.app.name = "SeaTunnel"
spark.executor.instances = 2
spark.executor.cores = 1
spark.executor.memory = "1g"
spark.master = local
}

source {
LocalFile {
tables_configs = [
{
path = "/seatunnel/read/csv"
file_format_type = csv
field_delimiter = "\t"
row_delimiter = "\n"
skip_header_row_number = 1
schema = {
table = "fake01"
fields {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
c_row = {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
}
}
}
},
{
path = "/seatunnel/read/csv"
file_format_type = csv
field_delimiter = "\t"
row_delimiter = "\n"
skip_header_row_number = 1
schema = {
table = "fake02"
fields {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
c_row = {
c_map = "map<string, string>"
c_array = "array<int>"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_bytes = bytes
c_date = date
c_decimal = "decimal(38, 18)"
c_timestamp = timestamp
}
}
}
}
]
plugin_output = "fake"
}
}

sink {
Assert {
rules {
table-names = ["fake01", "fake02"]
}
}
}
Loading