diff --git a/ratatool-diffy/src/main/scala/com/spotify/ratatool/diffy/BigDiffy.scala b/ratatool-diffy/src/main/scala/com/spotify/ratatool/diffy/BigDiffy.scala index 52ca5294..9cca7ef1 100644 --- a/ratatool-diffy/src/main/scala/com/spotify/ratatool/diffy/BigDiffy.scala +++ b/ratatool-diffy/src/main/scala/com/spotify/ratatool/diffy/BigDiffy.scala @@ -29,7 +29,7 @@ import com.spotify.scio.values.SCollection import com.twitter.algebird._ import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord -import org.apache.beam.sdk.io.{FileSystems, TextIO} +import org.apache.beam.sdk.io.TextIO import scala.annotation.tailrec import scala.collection.JavaConverters._ @@ -383,7 +383,7 @@ object BigDiffy extends Command { | | --input-mode=(avro|bigquery) Diff-ing Avro or BQ records | [--output-mode=(gcs|bigquery)] Saves to a text file in GCS or a BigQuery dataset. Defaults to GCS - | --key= '.' separated key field + | --key= '.' separated key field. Specify multiple --key params for multi key usage. | --lhs= LHS File path or BigQuery table | --rhs= RHS File path or BigQuery table | --output= File path prefix for output @@ -411,7 +411,7 @@ object BigDiffy extends Command { sys.exit(1) } - private def avroKeyFn(keys: Seq[String]): GenericRecord => String = { + private[diffy] def avroKeyFn(keys: Seq[String]): GenericRecord => String = { @tailrec def get(xs: Array[String], i: Int, r: GenericRecord): String = if (i == xs.length - 1) { @@ -424,7 +424,7 @@ object BigDiffy extends Command { (r: GenericRecord) => xs.map { x => get(x, 0, r) }.mkString("_") } - private def tableRowKeyFn(keys: Seq[String]): TableRow => String = { + private[diffy] def tableRowKeyFn(keys: Seq[String]): TableRow => String = { @tailrec def get(xs: Array[String], i: Int, r: java.util.Map[String, AnyRef]): String = if (i == xs.length - 1) { diff --git a/ratatool-diffy/src/test/scala/com/spotify/ratatool/diffy/BigDiffyTest.scala b/ratatool-diffy/src/test/scala/com/spotify/ratatool/diffy/BigDiffyTest.scala index b37bb7a3..cd1f399c 100644 --- a/ratatool-diffy/src/test/scala/com/spotify/ratatool/diffy/BigDiffyTest.scala +++ b/ratatool-diffy/src/test/scala/com/spotify/ratatool/diffy/BigDiffyTest.scala @@ -17,15 +17,18 @@ package com.spotify.ratatool.diffy -import com.spotify.ratatool.avro.specific.{RequiredNestedRecord, TestRecord} -import com.spotify.ratatool.scalacheck._ -import com.spotify.scio.testing.PipelineSpec import org.apache.avro.util.Utf8 import org.apache.beam.sdk.coders.AvroCoder import org.apache.beam.sdk.util.CoderUtils import org.scalacheck.Gen import org.scalacheck.rng.Seed +import com.spotify.ratatool.avro.specific.{RequiredNestedRecord, TestRecord} +import com.spotify.ratatool.scalacheck._ +import com.spotify.scio.testing.PipelineSpec + +import com.google.api.services.bigquery.model.TableRow + class BigDiffyTest extends PipelineSpec { val keys = (1 to 1000).map("key" + _) @@ -116,4 +119,41 @@ class BigDiffyTest extends PipelineSpec { } } + "BigDiffy avroKeyFn" should "work with single key" in { + val record = specificRecordOf[TestRecord].sample.get + val keyValue = BigDiffy.avroKeyFn(Seq("required_fields.int_field"))(record) + + keyValue shouldBe record.getRequiredFields.getIntField.toString + } + + "BigDiffy avroKeyFn" should "work with multiple key" in { + val record = specificRecordOf[TestRecord].sample.get + val keys = Seq("required_fields.int_field", "required_fields.double_field") + val keyValues = BigDiffy.avroKeyFn(keys)(record) + val expectedKey = + s"${record.getRequiredFields.getIntField}_${record.getRequiredFields.getDoubleField}" + + keyValues shouldBe expectedKey + } + + "BigDiffy tableRowKeyFn" should "work with single key" in { + val record = new TableRow() + record.set("key", "foo") + val keyValue = BigDiffy.tableRowKeyFn(Seq("key"))(record) + + keyValue shouldBe "foo" + } + + "BigDiffy tableRowKeyFn" should "work with multiple key" in { + val subRecord = new TableRow() + subRecord.set("key", "foo") + subRecord.set("other_key", "bar") + val record = new TableRow() + record.set("record", subRecord) + + val keys = Seq("record.key", "record.other_key") + val keyValues = BigDiffy.tableRowKeyFn(keys)(record.asInstanceOf[TableRow]) + + keyValues shouldBe "foo_bar" + } }