From 2c45ad24c3543b37b6b5db5438bc46b2ef72f879 Mon Sep 17 00:00:00 2001 From: Reza Karegar Date: Tue, 18 Jul 2023 21:39:19 +0000 Subject: [PATCH 1/6] Create examples for the Bigtable Spark connector in Java, Scala, and Python. --- bigtable/spark-connector-preview/README | 10 ++ .../spark-connector-preview/java-maven/README | 88 ++++++++++++++ .../java-maven/pom.xml | 108 +++++++++++++++++ .../bigtable/spark/example/WordCount.java | 111 ++++++++++++++++++ .../bigtable/spark/example/model/TestRow.java | 85 ++++++++++++++ .../spark-connector-preview/python/README | 74 ++++++++++++ .../python/word_count.py | 68 +++++++++++ .../scala-sbt/README.md | 88 ++++++++++++++ .../scala-sbt/build.sbt | 55 +++++++++ .../scala-sbt/project/assembly.sbt | 17 +++ .../scala-sbt/project/build.properties | 15 +++ .../scala-sbt/project/plugins.sbt | 1 + .../bigtable/spark/example/WordCount.scala | 81 +++++++++++++ 13 files changed, 801 insertions(+) create mode 100644 bigtable/spark-connector-preview/README create mode 100644 bigtable/spark-connector-preview/java-maven/README create mode 100644 bigtable/spark-connector-preview/java-maven/pom.xml create mode 100644 bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java create mode 100644 bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java create mode 100644 bigtable/spark-connector-preview/python/README create mode 100644 bigtable/spark-connector-preview/python/word_count.py create mode 100644 bigtable/spark-connector-preview/scala-sbt/README.md create mode 100644 bigtable/spark-connector-preview/scala-sbt/build.sbt create mode 100644 bigtable/spark-connector-preview/scala-sbt/project/assembly.sbt create mode 100644 bigtable/spark-connector-preview/scala-sbt/project/build.properties create mode 100644 bigtable/spark-connector-preview/scala-sbt/project/plugins.sbt create mode 100644 bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala diff --git a/bigtable/spark-connector-preview/README b/bigtable/spark-connector-preview/README new file mode 100644 index 00000000000..f50464c7d61 --- /dev/null +++ b/bigtable/spark-connector-preview/README @@ -0,0 +1,10 @@ +# Examples for Cloud Bigtable Apache Spark connector Private Preview + +This project contains sample code to use the Bigtable Spark connector in +different languages. You can refer to the following subdirectories +to access the example for each of the languages, as well as commands +needed to run the examples using Dataproc: + +1. `java-maven/` +2. `scala-sbt/` +3. `python/` diff --git a/bigtable/spark-connector-preview/java-maven/README b/bigtable/spark-connector-preview/java-maven/README new file mode 100644 index 00000000000..a58fee9c82e --- /dev/null +++ b/bigtable/spark-connector-preview/java-maven/README @@ -0,0 +1,88 @@ +# Bigtable Spark Example Using Java and Maven + +This example uses Java and Maven for package management to write data +to a Bigtable table and read it back. + +## Compiling the project + +To compile the code, you can run +the following command (after installing Maven) from inside the current +directory: + +``` +mvn clean install +``` + +The target JAR will be located under +`target/bigtable-spark-example-0.0.1-SNAPSHOT.jar`. + +## Running the example using Dataproc + +To submit the JAR to Dataproc, you will need a Bigtable project and +instance ID, as well as a Bigtable table name, which will be the three required +arguments. By default, a new table is created by the application, but you can +provide an optional fourth arguemnt `false` for `createNewTable` (assuming +that you have already created a table with the column family `example_family`). + +To run the JAR using dataproc, you can run the following command: + +``` +gcloud dataproc jobs submit spark \ +--cluster=$BIGTABLE_SPARK_DATAPROC_CLUSTER \ +--region=$BIGTABLE_SPARK_DATAPROC_REGION \ +--class=bigtable.spark.example.WordCount \ +--jars=target/bigtable-spark-example-0.0.1-SNAPSHOT.jar \ +-- \ +$BIGTABLE_SPARK_PROJECT_ID \ +$BIGTABLE_SPARK_INSTANCE_ID \ +$BIGTABLE_SPARK_TABLE_NAME +``` + +## Expected output + +The following text should be shown in the output of the Spark job. + +``` +Reading the DataFrame from Bigtable: ++-----+-----+ +|count| word| ++-----+-----+ +| 0|word0| +| 1|word1| +| 2|word2| +| 3|word3| +| 4|word4| +| 5|word5| +| 6|word6| +| 7|word7| +| 8|word8| +| 9|word9| ++-----+-----+ +``` + + +To verify that the data has been written to Bigtable, you can run the following +command (requires [cbt CLI](https://cloud.google.com/bigtable/docs/cbt-overview)): + +``` +cbt -project=$BIGTABLE_SPARK_PROJECT_ID -instance=$BIGTABLE_SPARK_INSTANCE_ID \ +read $BIGTABLE_SPARK_TABLE_NAME +``` + +With this expected output: +``` +---------------------------------------- +word0 + example_family:countCol @ 2023/07/11-16:05:59.596000 + "\x00\x00\x00\x00" + +---------------------------------------- +word1 + example_family:countCol @ 2023/07/11-16:05:59.611000 + "\x00\x00\x00\x01" + +---------------------------------------- +. +. +. +``` diff --git a/bigtable/spark-connector-preview/java-maven/pom.xml b/bigtable/spark-connector-preview/java-maven/pom.xml new file mode 100644 index 00000000000..6eac71e957c --- /dev/null +++ b/bigtable/spark-connector-preview/java-maven/pom.xml @@ -0,0 +1,108 @@ + + + + 4.0.0 + + com.google.cloud.bigtable + bigtable-spark-example + jar + 0.0.1-SNAPSHOT + + + 2.12 + 3.1.2 + 1.7.36 + 0.0.1-preview1-SNAPSHOT + 1.8 + 1.8 + + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + + com.google.cloud.bigtable + bigtable-spark + ${bigtable.spark.version} + + + + org.slf4j + slf4j-reload4j + ${reload4j.version} + + + + + + artifact-registry + artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/bigtable-spark-connector-preview + + true + + + true + + + + + + + + com.google.cloud.artifactregistry + artifactregistry-maven-wagon + 2.2.0 + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.4 + + + package + + shade + + + + + bigtable.spark.example.WordCount + + + + + + + + + \ No newline at end of file diff --git a/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java b/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java new file mode 100644 index 00000000000..a6737860da0 --- /dev/null +++ b/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java @@ -0,0 +1,111 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package bigtable.spark.example; + +import bigtable.spark.example.model.TestRow; + +import java.util.ArrayList; +import java.util.Arrays; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; + +public class WordCount { + private static SparkSession spark; + private static String projectId; + private static String instanceId; + private static String tableName; + private static String createNewTable = "true"; + + private static void parseArguments(String[] args) throws IllegalArgumentException { + if (args.length < 3) { + throw new IllegalArgumentException( + "Arguments Bigtable project ID, instance ID, " + + "and table name must be specified"); + } + projectId = args[0]; + instanceId = args[1]; + tableName = args[2]; + if (args.length > 3) { + createNewTable = args[3]; + } + } + + public static void main(String[] args) throws IllegalArgumentException { + parseArguments(args); + + spark = SparkSession.builder().getOrCreate(); + + Dataset df = createTestDataFrame(); + System.out.println("Created the DataFrame:"); + df.show(); + + String catalog = "{" + + "\"table\":{\"namespace\":\"default\", \"name\":\"" + tableName + "\"," + + "\"tableCoder\":\"PrimitiveType\"}," + + "\"rowkey\":\"wordCol\"," + + "\"columns\":{" + + "\"word\":{\"cf\":\"rowkey\", \"col\":\"wordCol\", \"type\":\"string\"}," + + "\"count\":{\"cf\":\"example_family\", \"col\":\"countCol\", \"type\":\"int\"}" + + "}}".replaceAll("\\s+", ""); + + writeDataframeToBigtable(df, catalog, createNewTable); + System.out.println("DataFrame was written to Bigtable."); + + Dataset readDf = readDataframeFromBigtable(catalog); + + System.out.println("Reading the DataFrame from Bigtable:"); + readDf.show(); + } + + private static Dataset createTestDataFrame() { + ArrayList rows = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + rows.add(new TestRow(String.format("word%d", i), i)); + } + Dataset df = spark.createDataset( + rows, + Encoders.bean(TestRow.class)) + .toDF(); + + return df; + } + + private static void writeDataframeToBigtable(Dataset dataframe, String catalog, + String createNewTable) { + dataframe + .write() + .format("bigtable") + .option("catalog", catalog) + .option("bigtable.spark.project.id", projectId) + .option("bigtable.spark.instance.id", instanceId) + .option("bigtable.spark.create.new.table", createNewTable) + .save(); + } + + private static Dataset readDataframeFromBigtable(String catalog) { + Dataset dataframe = spark + .read() + .format("bigtable") + .option("catalog", catalog) + .option("bigtable.spark.project.id", projectId) + .option("bigtable.spark.instance.id", instanceId) + .load(); + return dataframe; + } +} diff --git a/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java b/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java new file mode 100644 index 00000000000..2e8c2f7ebd1 --- /dev/null +++ b/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java @@ -0,0 +1,85 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package bigtable.spark.example.model; + +/** A JavaBean class for using as a DataFrame row in tests. */ +public class TestRow { + private String word; + private int count; + + public TestRow() { + } + + public TestRow( + String word, + int count) { + this.word = word; + this.count = count; + } + + public void setWord(String word) { + this.word = word; + } + + public String getWord() { + return word; + } + + public void setCount(int count) { + this.count = count; + } + + public int getCount() { + return count; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((word == null) ? 0 : word.hashCode()); + result = prime * result + count; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + TestRow other = (TestRow) obj; + if (word == null) { + if (other.word != null) + return false; + } else if (!word.equals(other.word)) + return false; + if (count != other.count) + return false; + return true; + } + + @Override + public String toString() { + return "TestRow [word=" + + word + + ", count=" + + count + + "]"; + } +} diff --git a/bigtable/spark-connector-preview/python/README b/bigtable/spark-connector-preview/python/README new file mode 100644 index 00000000000..c83f8c6ab57 --- /dev/null +++ b/bigtable/spark-connector-preview/python/README @@ -0,0 +1,74 @@ +# Bigtable Spark Example Using Python + +This example uses Python to write data to a Bigtable table and read it back. + +## Running the example using Dataproc + +To submit the JAR to Dataproc, you will need a Bigtable project and +instance ID, as well as a Bigtable table name, which will be the three required +arguments. By default, a new table is created by the application, but you can +provide an optional fourth arguemnt `false` for `createNewTable` (assuming +that you have already created a table with the column family `example_family`). + +To run the JAR using dataproc, you can run the following command: + +``` +gcloud dataproc jobs submit pyspark \ +--cluster=$BIGTABLE_SPARK_DATAPROC_CLUSTER \ +--region=$BIGTABLE_SPARK_DATAPROC_REGION \ +--jars=gs://bigtable-spark-preview/jars/bigtable-spark-0.0.1-preview1-SNAPSHOT.jar \ +word_count.py \ +-- \ +--bigtableProjectId=$BIGTABLE_SPARK_PROJECT_ID \ +--bigtableInstanceId=$BIGTABLE_SPARK_INSTANCE_ID \ +--bigtableTableName=$BIGTABLE_SPARK_TABLE_NAME +``` + +## Expected output + +The following text should be shown in the output of the Spark job. + +``` +Reading the DataFrame from Bigtable: ++-----+-----+ +|count| word| ++-----+-----+ +| 0|word0| +| 1|word1| +| 2|word2| +| 3|word3| +| 4|word4| +| 5|word5| +| 6|word6| +| 7|word7| +| 8|word8| +| 9|word9| ++-----+-----+ +``` + + +To verify that the data has been written to Bigtable, you can run the following +command (requires [cbt CLI](https://cloud.google.com/bigtable/docs/cbt-overview)): + +``` +cbt -project=$BIGTABLE_SPARK_PROJECT_ID -instance=$BIGTABLE_SPARK_INSTANCE_ID \ +read $BIGTABLE_SPARK_TABLE_NAME +``` + +With this expected output: +``` +---------------------------------------- +word0 + example_family:countCol @ 2023/07/11-16:05:59.596000 + "\x00\x00\x00\x00" + +---------------------------------------- +word1 + example_family:countCol @ 2023/07/11-16:05:59.611000 + "\x00\x00\x00\x01" + +---------------------------------------- +. +. +. +``` diff --git a/bigtable/spark-connector-preview/python/word_count.py b/bigtable/spark-connector-preview/python/word_count.py new file mode 100644 index 00000000000..26afde8a724 --- /dev/null +++ b/bigtable/spark-connector-preview/python/word_count.py @@ -0,0 +1,68 @@ +from pyspark.sql import SparkSession +import argparse + +PROJECT_ID_PROPERTY_NAME = 'bigtableProjectId' +INSTANCE_ID_PROPERTY_NAME = 'bigtableInstanceId' +TABLE_NAME_PROPERTY_NAME = 'bigtableTableName' +CREATE_NEW_TABLE_PROPERTY_NAME = 'createNewTable' + +parser = argparse.ArgumentParser() +parser.add_argument( + '--' + PROJECT_ID_PROPERTY_NAME, help='Bigtable project ID.') +parser.add_argument( + '--' + INSTANCE_ID_PROPERTY_NAME, help='Bigtable instance ID.') +parser.add_argument( + '--' + TABLE_NAME_PROPERTY_NAME, help='Bigtable table name.') +args = vars(parser.parse_args()) # Convert args from Namespace to dict. + +bigtable_project_id = args.get(PROJECT_ID_PROPERTY_NAME) +bigtable_instance_id = args.get(INSTANCE_ID_PROPERTY_NAME) +bigtable_table_name = args.get(TABLE_NAME_PROPERTY_NAME) +create_new_table = args.get(CREATE_NEW_TABLE_PROPERTY_NAME) or 'true' + +if not (bigtable_project_id and + bigtable_instance_id and + bigtable_table_name): + raise ValueError( + f'Bigtable project ID, instance ID, and table id should be specified ' + f'using --{PROJECT_ID_PROPERTY_NAME}=X, --{INSTANCE_ID_PROPERTY_NAME}=Y, ' + f'and --{TABLE_NAME_PROPERTY_NAME}=Z, respectively.' + ) + +bigtable_table_name = 'wordcountexample' + +spark = SparkSession.builder.getOrCreate() + +catalog = ''.join(("""{ + "table":{"namespace":"default", "name":" """ + bigtable_table_name + """ + ", "tableCoder":"PrimitiveType"}, + "rowkey":"wordCol", + "columns":{ + "word":{"cf":"rowkey", "col":"wordCol", "type":"string"}, + "count":{"cf":"example_family", "col":"countCol", "type":"int"} + } + }""").split()) + +data = [{'word': f'word{i}', 'count': i} for i in range(10)] +input_data = spark.createDataFrame(data) +print('Created the DataFrame:') +input_data.show() + +input_data.write \ + .format('bigtable') \ + .options(catalog=catalog) \ + .option('bigtable.spark.project.id', bigtable_project_id) \ + .option('bigtable.spark.instance.id', bigtable_instance_id) \ + .option('bigtable.spark.create.new.table', create_new_table) \ + .save() +print('DataFrame was written to Bigtable.') + +records = spark.read \ + .format('bigtable') \ + .option('bigtable.spark.project.id', bigtable_project_id) \ + .option('bigtable.spark.instance.id', bigtable_instance_id) \ + .options(catalog=catalog) \ + .load() + +print('Reading the DataFrame from Bigtable:') +records.show() diff --git a/bigtable/spark-connector-preview/scala-sbt/README.md b/bigtable/spark-connector-preview/scala-sbt/README.md new file mode 100644 index 00000000000..135b42caa95 --- /dev/null +++ b/bigtable/spark-connector-preview/scala-sbt/README.md @@ -0,0 +1,88 @@ +# Bigtable Spark Example Using Scala and sbt + +This example uses Scala and sbt for package management to write data +to a Bigtable table and read it back. + +## Compiling the project + +To compile the code, you can run +the following command (after installing sbt) from inside the current +directory: + +``` +sbt clean assembly +``` + +The target JAR will be located under +`target/scala-2.12/bigtable-spark-example-assembly-0.1.jar`. + +## Running the example using Dataproc + +To submit this PySpark job to Dataproc, you will need a Bigtable project and +instance ID, as well as a Bigtable table name, which will be the three required +arguments. By default, a new table is created by the application, but you can +provide an optional fourth arguemnt `false` for `createNewTable` (assuming +that you have already created a table with the column family `example_family`). + +To run the job using dataproc, you can run the following command: + +``` +gcloud dataproc jobs submit spark \ +--cluster=$BIGTABLE_SPARK_DATAPROC_CLUSTER \ +--region=$BIGTABLE_SPARK_DATAPROC_REGION \ +--class=bigtable.spark.example.WordCount \ +--jars=target/scala-2.12/bigtable-spark-example-assembly-0.1.jar \ +-- \ +$BIGTABLE_SPARK_PROJECT_ID \ +$BIGTABLE_SPARK_INSTANCE_ID \ +$BIGTABLE_SPARK_TABLE_NAME +``` + +## Expected output + +The following text should be shown in the output of the Spark job. + +``` +Reading the DataFrame from Bigtable: ++-----+-----+ +|count| word| ++-----+-----+ +| 0|word0| +| 1|word1| +| 2|word2| +| 3|word3| +| 4|word4| +| 5|word5| +| 6|word6| +| 7|word7| +| 8|word8| +| 9|word9| ++-----+-----+ +``` + + +To verify that the data has been written to Bigtable, you can run the following +command (requires [cbt CLI](https://cloud.google.com/bigtable/docs/cbt-overview)): + +``` +cbt -project=$BIGTABLE_SPARK_PROJECT_ID -instance=$BIGTABLE_SPARK_INSTANCE_ID \ +read $BIGTABLE_SPARK_TABLE_NAME +``` + +With this expected output: +``` +---------------------------------------- +word0 + example_family:Count @ 2023/07/11-18:31:51.349000 + "\x00\x00\x00\x00" + +---------------------------------------- +word1 + example_family:Count @ 2023/07/11-18:31:51.385000 + "\x00\x00\x00\x01" + +---------------------------------------- +. +. +. +``` diff --git a/bigtable/spark-connector-preview/scala-sbt/build.sbt b/bigtable/spark-connector-preview/scala-sbt/build.sbt new file mode 100644 index 00000000000..f0bcfa80436 --- /dev/null +++ b/bigtable/spark-connector-preview/scala-sbt/build.sbt @@ -0,0 +1,55 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +name := "bigtable-spark-example" + +version := "0.1" + +scalaVersion := "2.12.10" +val sparkVersion = "3.0.1" + +resolvers += "Private preview artifacts" at "artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/bigtable-spark-connector-preview" + +libraryDependencies += "com.google.cloud.bigtable" % "bigtable-spark" % "0.0.1-preview1-SNAPSHOT" + +libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-sql" % sparkVersion % Provided, + "org.slf4j" % "slf4j-reload4j" % "1.7.36", +) + +val scalatestVersion = "3.2.6" +libraryDependencies += "org.scalactic" %% "scalactic" % scalatestVersion +libraryDependencies += "org.scalatest" %% "scalatest" % scalatestVersion % "test" +test in assembly := {} + +ThisBuild / assemblyMergeStrategy := { + case PathList("META-INF", "io.netty.versions.properties") => MergeStrategy.first + case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard + case PathList("META-INF", "native", xs @ _*) => MergeStrategy.first + case PathList("META-INF", "native-image", xs @ _*) => MergeStrategy.first + case PathList("mozilla", "public-suffix-list.txt") => MergeStrategy.first + case PathList("google", xs @ _*) => xs match { + case ps @ (x :: xs) if ps.last.endsWith(".proto") => MergeStrategy.first + case _ => MergeStrategy.deduplicate + } + case PathList("javax", xs @ _*) => MergeStrategy.first + case PathList("io", "netty", xs @ _*) => MergeStrategy.first + case PathList(ps @ _*) if ps.last endsWith ".proto" => MergeStrategy.first + case PathList(ps @ _*) if ps.last endsWith "module-info.class" => MergeStrategy.discard + case x => + val oldStrategy = (ThisBuild / assemblyMergeStrategy).value + oldStrategy(x) +} diff --git a/bigtable/spark-connector-preview/scala-sbt/project/assembly.sbt b/bigtable/spark-connector-preview/scala-sbt/project/assembly.sbt new file mode 100644 index 00000000000..dcc3e01e2d9 --- /dev/null +++ b/bigtable/spark-connector-preview/scala-sbt/project/assembly.sbt @@ -0,0 +1,17 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1") \ No newline at end of file diff --git a/bigtable/spark-connector-preview/scala-sbt/project/build.properties b/bigtable/spark-connector-preview/scala-sbt/project/build.properties new file mode 100644 index 00000000000..72a672744ef --- /dev/null +++ b/bigtable/spark-connector-preview/scala-sbt/project/build.properties @@ -0,0 +1,15 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +sbt.version = 1.9.1 \ No newline at end of file diff --git a/bigtable/spark-connector-preview/scala-sbt/project/plugins.sbt b/bigtable/spark-connector-preview/scala-sbt/project/plugins.sbt new file mode 100644 index 00000000000..8ac088511d4 --- /dev/null +++ b/bigtable/spark-connector-preview/scala-sbt/project/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.latestbit" % "sbt-gcs-plugin" % "1.8.0") diff --git a/bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala b/bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala new file mode 100644 index 00000000000..7348d7c7aff --- /dev/null +++ b/bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala @@ -0,0 +1,81 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package bigtable.spark.example + +import org.apache.spark.sql.SparkSession + +object WordCount extends App { + val (projectId, instanceId, tableName, createNewTable) = parse(args) + + val spark = SparkSession.builder().getOrCreate() + + val catalog = + s"""{ + |"table":{"namespace":"default", "name":"$tableName", "tableCoder":"PrimitiveType"}, + |"rowkey":"wordCol", + |"columns":{ + | "word":{"cf":"rowkey", "col":"wordCol", "type":"string"}, + | "count":{"cf":"example_family", "col":"countCol", "type":"int"} + |} + |}""".stripMargin + + import spark.implicits._ + val data = (0 to 9).map(i => ("word%d".format(i), i)) + val rdd = spark.sparkContext.parallelize(data) + val df = rdd.toDF("word", "count") + + println("Created the DataFrame:"); + df.show() + + df + .write + .format("bigtable") + .option("catalog", catalog) + .option("bigtable.spark.project.id", projectId) + .option("bigtable.spark.instance.id", instanceId) + .option("bigtable.spark.create.new.table", createNewTable) + .save + println("DataFrame was written to Bigtable.") + + val readDf = spark + .read + .format("bigtable") + .option("catalog", catalog) + .option("bigtable.spark.project.id", projectId) + .option("bigtable.spark.instance.id", instanceId) + .load + + println("Reading the DataFrame from Bigtable:"); + readDf.show() + + def parse(args: Array[String]): (String, String, String, String) = { + import scala.util.Try + val projectId = Try(args(0)).getOrElse { + throw new IllegalArgumentException("Missing command-line argument: BIGTABLE_SPARK_PROJECT_ID") + } + val instanceId = Try(args(1)).getOrElse { + throw new IllegalArgumentException("Missing command-line argument: BIGTABLE_SPARK_INSTANCE_ID") + } + val tableName = Try(args(2)).getOrElse { + throw new IllegalArgumentException("Missing command-line argument: BIGTABLE_SPARK_TABLE_NAME") + } + val createNewTable = Try(args(3)).getOrElse { + "true" + } + (projectId, instanceId, tableName, createNewTable) + } +} + From 9dddb9bc714b2d73224db0bfcc01d9097a8fe8bb Mon Sep 17 00:00:00 2001 From: Reza Karegar Date: Thu, 27 Jul 2023 11:17:58 -0700 Subject: [PATCH 2/6] Fix overwriting default table name --- bigtable/spark-connector-preview/python/word_count.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bigtable/spark-connector-preview/python/word_count.py b/bigtable/spark-connector-preview/python/word_count.py index 26afde8a724..63bab8a9061 100644 --- a/bigtable/spark-connector-preview/python/word_count.py +++ b/bigtable/spark-connector-preview/python/word_count.py @@ -29,8 +29,6 @@ f'and --{TABLE_NAME_PROPERTY_NAME}=Z, respectively.' ) -bigtable_table_name = 'wordcountexample' - spark = SparkSession.builder.getOrCreate() catalog = ''.join(("""{ From fbdfb0661e1b2c290aed584a1cfd4e7831e16ac0 Mon Sep 17 00:00:00 2001 From: Reza Karegar Date: Thu, 30 Nov 2023 21:27:56 +0000 Subject: [PATCH 3/6] Switch to using the spark-bigtable naming convention. --- bigtable/spark-connector-preview/README | 2 +- .../spark-connector-preview/java-maven/README | 22 +++++++++---------- .../java-maven/pom.xml | 16 +++++++------- .../bigtable}/example/WordCount.java | 14 ++++++------ .../bigtable}/example/model/TestRow.java | 2 +- .../spark-connector-preview/python/README | 18 +++++++-------- .../python/word_count.py | 10 ++++----- .../scala-sbt/README.md | 22 +++++++++---------- .../scala-sbt/build.sbt | 6 ++--- .../bigtable}/example/WordCount.scala | 18 +++++++-------- 10 files changed, 65 insertions(+), 65 deletions(-) rename bigtable/spark-connector-preview/java-maven/src/main/java/{bigtable/spark => spark/bigtable}/example/WordCount.java (89%) rename bigtable/spark-connector-preview/java-maven/src/main/java/{bigtable/spark => spark/bigtable}/example/model/TestRow.java (98%) rename bigtable/spark-connector-preview/scala-sbt/src/main/scala/{bigtable/spark => spark/bigtable}/example/WordCount.scala (84%) diff --git a/bigtable/spark-connector-preview/README b/bigtable/spark-connector-preview/README index f50464c7d61..a9546a4bd63 100644 --- a/bigtable/spark-connector-preview/README +++ b/bigtable/spark-connector-preview/README @@ -1,6 +1,6 @@ # Examples for Cloud Bigtable Apache Spark connector Private Preview -This project contains sample code to use the Bigtable Spark connector in +This project contains sample code to use the Spark Bigtable connector in different languages. You can refer to the following subdirectories to access the example for each of the languages, as well as commands needed to run the examples using Dataproc: diff --git a/bigtable/spark-connector-preview/java-maven/README b/bigtable/spark-connector-preview/java-maven/README index a58fee9c82e..15c384ff2b0 100644 --- a/bigtable/spark-connector-preview/java-maven/README +++ b/bigtable/spark-connector-preview/java-maven/README @@ -1,4 +1,4 @@ -# Bigtable Spark Example Using Java and Maven +# Spark Bigtable Example Using Java and Maven This example uses Java and Maven for package management to write data to a Bigtable table and read it back. @@ -14,7 +14,7 @@ mvn clean install ``` The target JAR will be located under -`target/bigtable-spark-example-0.0.1-SNAPSHOT.jar`. +`target/spark-bigtable-example-0.0.1-SNAPSHOT.jar`. ## Running the example using Dataproc @@ -28,14 +28,14 @@ To run the JAR using dataproc, you can run the following command: ``` gcloud dataproc jobs submit spark \ ---cluster=$BIGTABLE_SPARK_DATAPROC_CLUSTER \ ---region=$BIGTABLE_SPARK_DATAPROC_REGION \ ---class=bigtable.spark.example.WordCount \ ---jars=target/bigtable-spark-example-0.0.1-SNAPSHOT.jar \ +--cluster=$SPARK_BIGTABLE_DATAPROC_CLUSTER \ +--region=$SPARK_BIGTABLE_DATAPROC_REGION \ +--class=spark.bigtable.example.WordCount \ +--jars=target/spark-bigtable-example-0.0.1-SNAPSHOT.jar \ -- \ -$BIGTABLE_SPARK_PROJECT_ID \ -$BIGTABLE_SPARK_INSTANCE_ID \ -$BIGTABLE_SPARK_TABLE_NAME +$SPARK_BIGTABLE_PROJECT_ID \ +$SPARK_BIGTABLE_INSTANCE_ID \ +$SPARK_BIGTABLE_TABLE_NAME ``` ## Expected output @@ -65,8 +65,8 @@ To verify that the data has been written to Bigtable, you can run the following command (requires [cbt CLI](https://cloud.google.com/bigtable/docs/cbt-overview)): ``` -cbt -project=$BIGTABLE_SPARK_PROJECT_ID -instance=$BIGTABLE_SPARK_INSTANCE_ID \ -read $BIGTABLE_SPARK_TABLE_NAME +cbt -project=$SPARK_BIGTABLE_PROJECT_ID -instance=$SPARK_BIGTABLE_INSTANCE_ID \ +read $SPARK_BIGTABLE_TABLE_NAME ``` With this expected output: diff --git a/bigtable/spark-connector-preview/java-maven/pom.xml b/bigtable/spark-connector-preview/java-maven/pom.xml index 6eac71e957c..8ae0f0b6eba 100644 --- a/bigtable/spark-connector-preview/java-maven/pom.xml +++ b/bigtable/spark-connector-preview/java-maven/pom.xml @@ -19,8 +19,8 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - com.google.cloud.bigtable - bigtable-spark-example + com.google.cloud.spark.bigtable + spark-bigtable-example jar 0.0.1-SNAPSHOT @@ -28,7 +28,7 @@ 2.12 3.1.2 1.7.36 - 0.0.1-preview1-SNAPSHOT + 0.0.1-preview2-SNAPSHOT 1.8 1.8 @@ -48,9 +48,9 @@ - com.google.cloud.bigtable - bigtable-spark - ${bigtable.spark.version} + com.google.cloud.spark.bigtable + spark-bigtable + ${spark.bigtable.version} @@ -63,7 +63,7 @@ artifact-registry - artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/bigtable-spark-connector-preview + artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/spark-bigtable-connector-preview true @@ -96,7 +96,7 @@ - bigtable.spark.example.WordCount + spark.bigtable.example.WordCount diff --git a/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java b/bigtable/spark-connector-preview/java-maven/src/main/java/spark/bigtable/example/WordCount.java similarity index 89% rename from bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java rename to bigtable/spark-connector-preview/java-maven/src/main/java/spark/bigtable/example/WordCount.java index a6737860da0..29a533ead99 100644 --- a/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/WordCount.java +++ b/bigtable/spark-connector-preview/java-maven/src/main/java/spark/bigtable/example/WordCount.java @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package bigtable.spark.example; +package spark.bigtable.example; -import bigtable.spark.example.model.TestRow; +import spark.bigtable.example.model.TestRow; import java.util.ArrayList; import java.util.Arrays; @@ -92,9 +92,9 @@ private static void writeDataframeToBigtable(Dataset dataframe, String cata .write() .format("bigtable") .option("catalog", catalog) - .option("bigtable.spark.project.id", projectId) - .option("bigtable.spark.instance.id", instanceId) - .option("bigtable.spark.create.new.table", createNewTable) + .option("spark.bigtable.project.id", projectId) + .option("spark.bigtable.instance.id", instanceId) + .option("spark.bigtable.create.new.table", createNewTable) .save(); } @@ -103,8 +103,8 @@ private static Dataset readDataframeFromBigtable(String catalog) { .read() .format("bigtable") .option("catalog", catalog) - .option("bigtable.spark.project.id", projectId) - .option("bigtable.spark.instance.id", instanceId) + .option("spark.bigtable.project.id", projectId) + .option("spark.bigtable.instance.id", instanceId) .load(); return dataframe; } diff --git a/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java b/bigtable/spark-connector-preview/java-maven/src/main/java/spark/bigtable/example/model/TestRow.java similarity index 98% rename from bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java rename to bigtable/spark-connector-preview/java-maven/src/main/java/spark/bigtable/example/model/TestRow.java index 2e8c2f7ebd1..734e9bfe0fc 100644 --- a/bigtable/spark-connector-preview/java-maven/src/main/java/bigtable/spark/example/model/TestRow.java +++ b/bigtable/spark-connector-preview/java-maven/src/main/java/spark/bigtable/example/model/TestRow.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package bigtable.spark.example.model; +package spark.bigtable.example.model; /** A JavaBean class for using as a DataFrame row in tests. */ public class TestRow { diff --git a/bigtable/spark-connector-preview/python/README b/bigtable/spark-connector-preview/python/README index c83f8c6ab57..a41c65ac03f 100644 --- a/bigtable/spark-connector-preview/python/README +++ b/bigtable/spark-connector-preview/python/README @@ -1,4 +1,4 @@ -# Bigtable Spark Example Using Python +# Spark Bigtable Example Using Python This example uses Python to write data to a Bigtable table and read it back. @@ -14,14 +14,14 @@ To run the JAR using dataproc, you can run the following command: ``` gcloud dataproc jobs submit pyspark \ ---cluster=$BIGTABLE_SPARK_DATAPROC_CLUSTER \ ---region=$BIGTABLE_SPARK_DATAPROC_REGION \ ---jars=gs://bigtable-spark-preview/jars/bigtable-spark-0.0.1-preview1-SNAPSHOT.jar \ +--cluster=$SPARK_BIGTABLE_DATAPROC_CLUSTER \ +--region=$SPARK_BIGTABLE_DATAPROC_REGION \ +--jars=gs://spark-bigtable-preview/jars/spark-bigtable-0.0.1-preview2-SNAPSHOT.jar \ word_count.py \ -- \ ---bigtableProjectId=$BIGTABLE_SPARK_PROJECT_ID \ ---bigtableInstanceId=$BIGTABLE_SPARK_INSTANCE_ID \ ---bigtableTableName=$BIGTABLE_SPARK_TABLE_NAME +--bigtableProjectId=$SPARK_BIGTABLE_PROJECT_ID \ +--bigtableInstanceId=$SPARK_BIGTABLE_INSTANCE_ID \ +--bigtableTableName=$SPARK_BIGTABLE_TABLE_NAME ``` ## Expected output @@ -51,8 +51,8 @@ To verify that the data has been written to Bigtable, you can run the following command (requires [cbt CLI](https://cloud.google.com/bigtable/docs/cbt-overview)): ``` -cbt -project=$BIGTABLE_SPARK_PROJECT_ID -instance=$BIGTABLE_SPARK_INSTANCE_ID \ -read $BIGTABLE_SPARK_TABLE_NAME +cbt -project=$SPARK_BIGTABLE_PROJECT_ID -instance=$SPARK_BIGTABLE_INSTANCE_ID \ +read $SPARK_BIGTABLE_TABLE_NAME ``` With this expected output: diff --git a/bigtable/spark-connector-preview/python/word_count.py b/bigtable/spark-connector-preview/python/word_count.py index 63bab8a9061..73d6faf886d 100644 --- a/bigtable/spark-connector-preview/python/word_count.py +++ b/bigtable/spark-connector-preview/python/word_count.py @@ -49,16 +49,16 @@ input_data.write \ .format('bigtable') \ .options(catalog=catalog) \ - .option('bigtable.spark.project.id', bigtable_project_id) \ - .option('bigtable.spark.instance.id', bigtable_instance_id) \ - .option('bigtable.spark.create.new.table', create_new_table) \ + .option('spark.bigtable.project.id', bigtable_project_id) \ + .option('spark.bigtable.instance.id', bigtable_instance_id) \ + .option('spark.bigtable.create.new.table', create_new_table) \ .save() print('DataFrame was written to Bigtable.') records = spark.read \ .format('bigtable') \ - .option('bigtable.spark.project.id', bigtable_project_id) \ - .option('bigtable.spark.instance.id', bigtable_instance_id) \ + .option('spark.bigtable.project.id', bigtable_project_id) \ + .option('spark.bigtable.instance.id', bigtable_instance_id) \ .options(catalog=catalog) \ .load() diff --git a/bigtable/spark-connector-preview/scala-sbt/README.md b/bigtable/spark-connector-preview/scala-sbt/README.md index 135b42caa95..87786b4fa29 100644 --- a/bigtable/spark-connector-preview/scala-sbt/README.md +++ b/bigtable/spark-connector-preview/scala-sbt/README.md @@ -1,4 +1,4 @@ -# Bigtable Spark Example Using Scala and sbt +# Spark Bigtable Example Using Scala and sbt This example uses Scala and sbt for package management to write data to a Bigtable table and read it back. @@ -14,7 +14,7 @@ sbt clean assembly ``` The target JAR will be located under -`target/scala-2.12/bigtable-spark-example-assembly-0.1.jar`. +`target/scala-2.12/spark-bigtable-example-assembly-0.1.jar`. ## Running the example using Dataproc @@ -28,14 +28,14 @@ To run the job using dataproc, you can run the following command: ``` gcloud dataproc jobs submit spark \ ---cluster=$BIGTABLE_SPARK_DATAPROC_CLUSTER \ ---region=$BIGTABLE_SPARK_DATAPROC_REGION \ ---class=bigtable.spark.example.WordCount \ ---jars=target/scala-2.12/bigtable-spark-example-assembly-0.1.jar \ +--cluster=$SPARK_BIGTABLE_DATAPROC_CLUSTER \ +--region=$SPARK_BIGTABLE_DATAPROC_REGION \ +--class=spark.bigtable.example.WordCount \ +--jars=target/scala-2.12/spark-bigtable-example-assembly-0.1.jar \ -- \ -$BIGTABLE_SPARK_PROJECT_ID \ -$BIGTABLE_SPARK_INSTANCE_ID \ -$BIGTABLE_SPARK_TABLE_NAME +$SPARK_BIGTABLE_PROJECT_ID \ +$SPARK_BIGTABLE_INSTANCE_ID \ +$SPARK_BIGTABLE_TABLE_NAME ``` ## Expected output @@ -65,8 +65,8 @@ To verify that the data has been written to Bigtable, you can run the following command (requires [cbt CLI](https://cloud.google.com/bigtable/docs/cbt-overview)): ``` -cbt -project=$BIGTABLE_SPARK_PROJECT_ID -instance=$BIGTABLE_SPARK_INSTANCE_ID \ -read $BIGTABLE_SPARK_TABLE_NAME +cbt -project=$SPARK_BIGTABLE_PROJECT_ID -instance=$SPARK_BIGTABLE_INSTANCE_ID \ +read $SPARK_BIGTABLE_TABLE_NAME ``` With this expected output: diff --git a/bigtable/spark-connector-preview/scala-sbt/build.sbt b/bigtable/spark-connector-preview/scala-sbt/build.sbt index f0bcfa80436..4509d175ea2 100644 --- a/bigtable/spark-connector-preview/scala-sbt/build.sbt +++ b/bigtable/spark-connector-preview/scala-sbt/build.sbt @@ -14,16 +14,16 @@ * limitations under the License. */ -name := "bigtable-spark-example" +name := "spark-bigtable-example" version := "0.1" scalaVersion := "2.12.10" val sparkVersion = "3.0.1" -resolvers += "Private preview artifacts" at "artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/bigtable-spark-connector-preview" +resolvers += "Private preview artifacts" at "artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/spark-bigtable-connector-preview" -libraryDependencies += "com.google.cloud.bigtable" % "bigtable-spark" % "0.0.1-preview1-SNAPSHOT" +libraryDependencies += "com.google.cloud.spark.bigtable" % "spark-bigtable" % "0.0.1-preview2-SNAPSHOT" libraryDependencies ++= Seq( "org.apache.spark" %% "spark-sql" % sparkVersion % Provided, diff --git a/bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala b/bigtable/spark-connector-preview/scala-sbt/src/main/scala/spark/bigtable/example/WordCount.scala similarity index 84% rename from bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala rename to bigtable/spark-connector-preview/scala-sbt/src/main/scala/spark/bigtable/example/WordCount.scala index 7348d7c7aff..9b7c423d283 100644 --- a/bigtable/spark-connector-preview/scala-sbt/src/main/scala/bigtable/spark/example/WordCount.scala +++ b/bigtable/spark-connector-preview/scala-sbt/src/main/scala/spark/bigtable/example/WordCount.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package bigtable.spark.example +package spark.bigtable.example import org.apache.spark.sql.SparkSession @@ -44,9 +44,9 @@ object WordCount extends App { .write .format("bigtable") .option("catalog", catalog) - .option("bigtable.spark.project.id", projectId) - .option("bigtable.spark.instance.id", instanceId) - .option("bigtable.spark.create.new.table", createNewTable) + .option("spark.bigtable.project.id", projectId) + .option("spark.bigtable.instance.id", instanceId) + .option("spark.bigtable.create.new.table", createNewTable) .save println("DataFrame was written to Bigtable.") @@ -54,8 +54,8 @@ object WordCount extends App { .read .format("bigtable") .option("catalog", catalog) - .option("bigtable.spark.project.id", projectId) - .option("bigtable.spark.instance.id", instanceId) + .option("spark.bigtable.project.id", projectId) + .option("spark.bigtable.instance.id", instanceId) .load println("Reading the DataFrame from Bigtable:"); @@ -64,13 +64,13 @@ object WordCount extends App { def parse(args: Array[String]): (String, String, String, String) = { import scala.util.Try val projectId = Try(args(0)).getOrElse { - throw new IllegalArgumentException("Missing command-line argument: BIGTABLE_SPARK_PROJECT_ID") + throw new IllegalArgumentException("Missing command-line argument: SPARK_BIGTABLE_PROJECT_ID") } val instanceId = Try(args(1)).getOrElse { - throw new IllegalArgumentException("Missing command-line argument: BIGTABLE_SPARK_INSTANCE_ID") + throw new IllegalArgumentException("Missing command-line argument: SPARK_BIGTABLE_INSTANCE_ID") } val tableName = Try(args(2)).getOrElse { - throw new IllegalArgumentException("Missing command-line argument: BIGTABLE_SPARK_TABLE_NAME") + throw new IllegalArgumentException("Missing command-line argument: SPARK_BIGTABLE_TABLE_NAME") } val createNewTable = Try(args(3)).getOrElse { "true" From 1f0abf616c8cfead1cc3152da76daac9d7946bab Mon Sep 17 00:00:00 2001 From: Reza Karegar Date: Thu, 11 Apr 2024 15:07:16 -0400 Subject: [PATCH 4/6] Update the Java example to use the preview5 version --- bigtable/spark-connector-preview/java-maven/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigtable/spark-connector-preview/java-maven/pom.xml b/bigtable/spark-connector-preview/java-maven/pom.xml index 8ae0f0b6eba..451195f8f66 100644 --- a/bigtable/spark-connector-preview/java-maven/pom.xml +++ b/bigtable/spark-connector-preview/java-maven/pom.xml @@ -28,7 +28,7 @@ 2.12 3.1.2 1.7.36 - 0.0.1-preview2-SNAPSHOT + 0.0.1-preview5-SNAPSHOT 1.8 1.8 @@ -105,4 +105,4 @@ - \ No newline at end of file + From d3e3e63d417e6e217f1ca89f77a04832bdbf7f55 Mon Sep 17 00:00:00 2001 From: Reza Karegar Date: Thu, 11 Apr 2024 15:08:27 -0400 Subject: [PATCH 5/6] Update the Scala example to use the preview5 version --- bigtable/spark-connector-preview/scala-sbt/build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigtable/spark-connector-preview/scala-sbt/build.sbt b/bigtable/spark-connector-preview/scala-sbt/build.sbt index 4509d175ea2..3f00d061203 100644 --- a/bigtable/spark-connector-preview/scala-sbt/build.sbt +++ b/bigtable/spark-connector-preview/scala-sbt/build.sbt @@ -23,7 +23,7 @@ val sparkVersion = "3.0.1" resolvers += "Private preview artifacts" at "artifactregistry://us-central1-maven.pkg.dev/cloud-bigtable-ecosystem/spark-bigtable-connector-preview" -libraryDependencies += "com.google.cloud.spark.bigtable" % "spark-bigtable" % "0.0.1-preview2-SNAPSHOT" +libraryDependencies += "com.google.cloud.spark.bigtable" % "spark-bigtable" % "0.0.1-preview5-SNAPSHOT" libraryDependencies ++= Seq( "org.apache.spark" %% "spark-sql" % sparkVersion % Provided, From abab2888d5e0c06cd040b76579d3922e43388b2a Mon Sep 17 00:00:00 2001 From: Reza Karegar Date: Thu, 11 Apr 2024 15:08:46 -0400 Subject: [PATCH 6/6] Update the Python example to use the preview5 version --- bigtable/spark-connector-preview/python/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigtable/spark-connector-preview/python/README b/bigtable/spark-connector-preview/python/README index a41c65ac03f..be39c7072eb 100644 --- a/bigtable/spark-connector-preview/python/README +++ b/bigtable/spark-connector-preview/python/README @@ -16,7 +16,7 @@ To run the JAR using dataproc, you can run the following command: gcloud dataproc jobs submit pyspark \ --cluster=$SPARK_BIGTABLE_DATAPROC_CLUSTER \ --region=$SPARK_BIGTABLE_DATAPROC_REGION \ ---jars=gs://spark-bigtable-preview/jars/spark-bigtable-0.0.1-preview2-SNAPSHOT.jar \ +--jars=gs://spark-bigtable-preview/jars/spark-bigtable-0.0.1-preview5-SNAPSHOT.jar \ word_count.py \ -- \ --bigtableProjectId=$SPARK_BIGTABLE_PROJECT_ID \