Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/zeotuan/spark-fast-tests in…
Browse files Browse the repository at this point in the history
…to AssertApproximateSmallDf
  • Loading branch information
zeotuan committed Aug 24, 2024
2 parents 4ed351f + a48bf28 commit 1d73486
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 19 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Docs
on:
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: olafurpg/setup-scala@v10
- name: Build docs
run: sbt laikaSite
- name: Deploy to GH Pages
uses: peaceiris/actions-gh-pages@v4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./target/docs/site
20 changes: 11 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# spark-fast-tests
# Spark Fast Tests

[![CI](https://github.com/MrPowers/spark-fast-tests/actions/workflows/ci.yml/badge.svg)](https://github.com/MrPowers/spark-fast-tests/actions/workflows/ci.yml)

Expand Down Expand Up @@ -64,7 +64,7 @@ val expectedDS = Seq(
).toDS
```

![assert_small_dataset_equality_error_message](https://github.com/MrPowers/spark-fast-tests/blob/master/images/assertSmallDatasetEquality_error_message.png)
![assert_small_dataset_equality_error_message](https://raw.githubusercontent.com/mrpowers-io/spark-fast-tests/main/images/assertSmallDatasetEquality_error_message.png)

The colors in the error message make it easy to identify the rows that aren't equal.

Expand All @@ -89,12 +89,14 @@ def myLowerClean(col: Column): Column = {

Here's how long the tests take to execute:

|test method|runtime|
|-------|--------------------|
|`assertLargeDataFrameEquality`|709 milliseconds|
|`assertSmallDataFrameEquality`|166 milliseconds|
|`assertColumnEquality`|108 milliseconds|
|`evalString`|26 milliseconds|

| test method | runtime |
|--------------------------------|------------------|
| `assertLargeDataFrameEquality` | 709 milliseconds |
| `assertSmallDataFrameEquality` | 166 milliseconds |
| `assertColumnEquality` | 108 milliseconds |
| `evalString` | 26 milliseconds |


`evalString` isn't as robust, but is the fastest. `assertColumnEquality` is robust and saves a lot of time.

Expand Down Expand Up @@ -192,7 +194,7 @@ The following code will throw a `ColumnMismatch` error message:
assertColumnEquality(df, "name", "expected_name")
```

![assert_column_equality_error_message](https://github.com/MrPowers/spark-fast-tests/blob/master/images/assertColumnEquality_error_message.png)
![assert_column_equality_error_message](https://raw.githubusercontent.com/mrpowers-io/spark-fast-tests/main/images/assertColumnEquality_error_message.png)

Mix in the `ColumnComparer` trait to your test class to access the `assertColumnEquality` method:

Expand Down
63 changes: 54 additions & 9 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
enablePlugins(GitVersioning)
Compile / scalafmtOnCompile:= true
Compile / scalafmtOnCompile := true

organization := "com.github.mrpowers"
name := "spark-fast-tests"
name := "spark-fast-tests"

version := "1.10.1"

val versionRegex = """^(.*)\.(.*)\.(.*)$""".r
val versionRegex = """^(.*)\.(.*)\.(.*)$""".r

val sparkVersion = settingKey[String]("Spark version")

val scala2_13= "2.13.13"
val scala2_12= "2.12.15"
val scala2_11= "2.11.17"
val scala2_13 = "2.13.13"
val scala2_12 = "2.12.15"
val scala2_11 = "2.11.17"

sparkVersion := System.getProperty("spark.testVersion", "3.5.1")
crossScalaVersions := {
sparkVersion.value match {
case versionRegex("3", m, _) if m.toInt >= 2 => Seq(scala2_12, scala2_13)
case versionRegex("3", _ , _) => Seq(scala2_12)
case versionRegex("3", _, _) => Seq(scala2_12)
}
}

scalaVersion := crossScalaVersions.value.head

libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test"

credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials")

Expand All @@ -45,4 +45,49 @@ publishMavenStyle := true

publishTo := sonatypePublishToBundle.value

Global / useGpgPinentry := true
Global / useGpgPinentry := true

enablePlugins(LaikaPlugin)

import laika.format.Markdown
import laika.config.SyntaxHighlighting
import laika.ast.Path.Root
import laika.ast.{Image, ExternalTarget}
import laika.helium.config._
import laika.helium.Helium

laikaTheme := Helium.defaults.site
.landingPage(
title = Some("Spark Fast Tests"),
subtitle = Some("Unit testing your Apache Spark application"),
latestReleases = Seq(
ReleaseInfo("Latest Stable Release", "1.0.0")
),
license = Some("Apache 2-0"),
titleLinks = Seq(
VersionMenu.create(unversionedLabel = "Getting Started"),
LinkGroup.create(
IconLink.external("https://github.com/mrpowers-io/spark-fast-tests", HeliumIcon.github)
)
),
linkPanel = Some(
LinkPanel(
"Documentation",
TextLink.internal(Root / "about" / "README.md", "Spark Fast Tests")
)
),
projectLinks = Seq(
LinkGroup.create(
TextLink.internal(Root / "api" / "com" / "github" / "mrpowers" / "spark" / "fast" / "tests" / "index.html", "API (Scaladoc)")
)
),
teasers = Seq(
Teaser("Fast", "Handle small dataframes effectively and provide column assertions"),
Teaser("Flexible", "Works fine with scalatest, uTest, munit")
)
)
.build

laikaIncludeAPI := true
laikaExtensions ++= Seq(Markdown.GitHubFlavor, SyntaxHighlighting)
Laika / sourceDirectories := Seq((ThisBuild / baseDirectory).value / "docs")
1 change: 1 addition & 0 deletions docs/about/README.md
4 changes: 3 additions & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")

addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12")

addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1")
addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1")

addSbtPlugin("org.typelevel" % "laika-sbt" % "1.2.0")

0 comments on commit 1d73486

Please sign in to comment.