Skip to content

Commit

Permalink
Add tests for *keyFn functions
Browse files Browse the repository at this point in the history
  • Loading branch information
martinbomio committed Dec 12, 2018
1 parent 500f292 commit db1a2ab
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import com.spotify.scio.values.SCollection
import com.twitter.algebird._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.beam.sdk.io.{FileSystems, TextIO}
import org.apache.beam.sdk.io.TextIO

import scala.annotation.tailrec
import scala.collection.JavaConverters._
Expand Down Expand Up @@ -383,7 +383,7 @@ object BigDiffy extends Command {
|
| --input-mode=(avro|bigquery) Diff-ing Avro or BQ records
| [--output-mode=(gcs|bigquery)] Saves to a text file in GCS or a BigQuery dataset. Defaults to GCS
| --key=<key> '.' separated key field
| --key=<key> '.' separated key field. Specify multiple --key params for multi key usage.
| --lhs=<path> LHS File path or BigQuery table
| --rhs=<path> RHS File path or BigQuery table
| --output=<output> File path prefix for output
Expand Down Expand Up @@ -411,7 +411,7 @@ object BigDiffy extends Command {
sys.exit(1)
}

private def avroKeyFn(keys: Seq[String]): GenericRecord => String = {
private[diffy] def avroKeyFn(keys: Seq[String]): GenericRecord => String = {
@tailrec
def get(xs: Array[String], i: Int, r: GenericRecord): String =
if (i == xs.length - 1) {
Expand All @@ -424,7 +424,7 @@ object BigDiffy extends Command {
(r: GenericRecord) => xs.map { x => get(x, 0, r) }.mkString("_")
}

private def tableRowKeyFn(keys: Seq[String]): TableRow => String = {
private[diffy] def tableRowKeyFn(keys: Seq[String]): TableRow => String = {
@tailrec
def get(xs: Array[String], i: Int, r: java.util.Map[String, AnyRef]): String =
if (i == xs.length - 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,18 @@

package com.spotify.ratatool.diffy

import com.spotify.ratatool.avro.specific.{RequiredNestedRecord, TestRecord}
import com.spotify.ratatool.scalacheck._
import com.spotify.scio.testing.PipelineSpec
import org.apache.avro.util.Utf8
import org.apache.beam.sdk.coders.AvroCoder
import org.apache.beam.sdk.util.CoderUtils
import org.scalacheck.Gen
import org.scalacheck.rng.Seed

import com.spotify.ratatool.avro.specific.{RequiredNestedRecord, TestRecord}
import com.spotify.ratatool.scalacheck._
import com.spotify.scio.testing.PipelineSpec

import com.google.api.services.bigquery.model.TableRow

class BigDiffyTest extends PipelineSpec {

val keys = (1 to 1000).map("key" + _)
Expand Down Expand Up @@ -116,4 +119,41 @@ class BigDiffyTest extends PipelineSpec {
}
}

"BigDiffy avroKeyFn" should "work with single key" in {
val record = specificRecordOf[TestRecord].sample.get
val keyValue = BigDiffy.avroKeyFn(Seq("required_fields.int_field"))(record)

keyValue shouldBe record.getRequiredFields.getIntField.toString
}

"BigDiffy avroKeyFn" should "work with multiple key" in {
val record = specificRecordOf[TestRecord].sample.get
val keys = Seq("required_fields.int_field", "required_fields.double_field")
val keyValues = BigDiffy.avroKeyFn(keys)(record)
val expectedKey =
s"${record.getRequiredFields.getIntField}_${record.getRequiredFields.getDoubleField}"

keyValues shouldBe expectedKey
}

"BigDiffy tableRowKeyFn" should "work with single key" in {
val record = new TableRow()
record.set("key", "foo")
val keyValue = BigDiffy.tableRowKeyFn(Seq("key"))(record)

keyValue shouldBe "foo"
}

"BigDiffy tableRowKeyFn" should "work with multiple key" in {
val subRecord = new TableRow()
subRecord.set("key", "foo")
subRecord.set("other_key", "bar")
val record = new TableRow()
record.set("record", subRecord)

val keys = Seq("record.key", "record.other_key")
val keyValues = BigDiffy.tableRowKeyFn(keys)(record.asInstanceOf[TableRow])

keyValues shouldBe "foo_bar"
}
}

0 comments on commit db1a2ab

Please sign in to comment.