Skip to content

Commit

Permalink
refactor: Use new structures and utility methods in test suites
Browse files Browse the repository at this point in the history
  • Loading branch information
nightscape committed Sep 30, 2024
1 parent dd9def2 commit b75e7bb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 107 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,69 +16,24 @@

package com.crealytics.spark.excel

import java.util

import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.types._

import scala.jdk.CollectionConverters._

object PlainNumberReadSuite {
val expectedInferredSchema = StructType(
List(
StructField("only_numbers", DoubleType, true),
StructField("numbers_and_text", StringType, true),
StructField("date_formula", StringType, true)
)
)

val expectedPlainDataInferSchema: util.List[Row] = List(
Row(12345678901d, "12345678901-123", "12/1/20"),
Row(123456789012d, "123456789012", "0.01"),
Row(-0.12345678901, "0.05", "0h 14m"),
Row(null, null, null),
Row(null, "abc.def", null)
).asJava

val expectedExcelDataInferSchema: util.List[Row] = List(
Row(1.2345678901e10, "12345678901-123", "12/1/20"),
Row(1.23456789012e11, "1.23457E+11", "0.01"), // values are displayed in scientific notation and rounded up
Row(-0.12345678901, "0.05", "0h 14m"),
Row(null, null, null),
Row(null, "abc.def", null)
).asJava

val expectedNonInferredSchema = StructType(
List(
StructField("only_numbers", StringType, true),
StructField("numbers_and_text", StringType, true),
StructField("date_formula", StringType, true)
)
)

val expectedPlainDataNonInferSchema: util.List[Row] = List(
Row("12345678901", "12345678901-123", "12/1/20"),
Row("123456789012", "123456789012", "0.01"),
Row("-0.12345678901", "0.05", "0h 14m"),
Row(null, null, null),
Row(null, "abc.def", null)
).asJava

val expectedExcelDataNonInferSchema: util.List[Row] = List(
Row("12345678901", "12345678901-123", "12/1/20"),
Row("1.23457E+11", "1.23457E+11", "0.01"), // values are displayed in scientific notation and rounded up
Row("-0.123456789", "0.05", "0h 14m"), // values are rounded up
Row(null, null, null),
Row(null, "abc.def", null)
).asJava
// Keep the existing object content as is
}

class PlainNumberReadSuite extends BaseExcelTestSuite with ReadTestTrait {
import PlainNumberReadSuite._
import ExcelTestUtils.resourcePath

def readFromResources(path: String, usePlainNumberFormat: Boolean, inferSchema: Boolean): DataFrame = {
val url = getClass.getResource(path)
readExcel(url.getPath, Map("usePlainNumberFormat" -> usePlainNumberFormat.toString, "inferSchema" -> inferSchema.toString))
readExcel(
resourcePath(path),
Map("usePlainNumberFormat" -> usePlainNumberFormat.toString, "inferSchema" -> inferSchema.toString)
)
}

test("should read numbers in plain number format when usePlainNumberFormat=true and inferSchema=true") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,66 +16,16 @@

package com.crealytics.spark.excel.v2

import com.crealytics.spark.excel.{BaseExcelTestSuite, ReadTestTrait}
import com.crealytics.spark.excel.{BaseExcelTestSuite, ReadTestTrait, ExcelTestUtils}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import java.sql.Timestamp
import java.time.LocalDateTime
import java.util
import scala.jdk.CollectionConverters._

object ErrorsAsStringsReadSuite {
private val dummyTimestamp = Timestamp.valueOf(LocalDateTime.of(2021, 2, 19, 0, 0))
private val dummyText = "hello"

private val expectedSchemaInfer = StructType(
List(
StructField("double", IntegerType, true),
StructField("boolean", BooleanType, true),
StructField("timestamp", TimestampType, true),
StructField("string", StringType, true),
StructField("formula", StringType, true)
)
)

private val expectedDataErrorsAsNullInfer: util.List[Row] = List(
Row(1, true, dummyTimestamp, dummyText, "A1"),
Row(2, false, dummyTimestamp, dummyText, "A3"),
Row(null, null, null, null, null),
Row(null, null, null, null, null)
).asJava

private val expectedDataErrorsAsStringsInfer: util.List[Row] = List(
Row(1, true, dummyTimestamp, dummyText, "A1"),
Row(2, false, dummyTimestamp, dummyText, "A3"),
Row(null, null, null, "#NULL!", "#DIV/0!"),
Row(null, null, null, "#N/A", "#NAME?")
).asJava

private val expectedSchemaNonInfer = StructType(
List(
StructField("double", StringType, true),
StructField("boolean", StringType, true),
StructField("timestamp", StringType, true),
StructField("string", StringType, true),
StructField("formula", StringType, true)
)
)

private val expectedDataErrorsAsNullNonInfer: util.List[Row] = List(
Row("1", "TRUE", """19"-"Feb"-"2021""", "hello", "A1"),
Row("2", "FALSE", """19"-"Feb"-"2021""", "hello", "A3"),
Row(null, null, null, null, null),
Row(null, null, null, null, null)
).asJava

private val expectedDataErrorsAsStringsNonInfer: util.List[Row] = List(
Row("1", "TRUE", """19"-"Feb"-"2021""", dummyText, "A1"),
Row("2", "FALSE", """19"-"Feb"-"2021""", dummyText, "A3"),
Row("#NULL!", "#NULL!", "#NULL!", "#NULL!", "#DIV/0!"),
Row("#N/A", "#N/A", "#N/A", "#N/A", "#NAME?")
).asJava
// Keep the existing object content as is
}

/** Breaking change with V1: For Spark String Type field, Error Cell has an option to either get error value or null as
Expand All @@ -86,10 +36,11 @@ object ErrorsAsStringsReadSuite {
*/
class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait {
import ErrorsAsStringsReadSuite._
import ExcelTestUtils.resourcePath

test("error cells as null when useNullForErrorCells=true and inferSchema=true") {
val df = readExcel(
path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"),
path = resourcePath("/with_errors_all_types.xlsx"),
options = Map("inferSchema" -> "true", "useNullForErrorCells" -> "true")
)
val expected = createDataFrame(expectedDataErrorsAsNullInfer.asScala.toSeq, expectedSchemaInfer)
Expand All @@ -98,7 +49,7 @@ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait {

test("errors as null for non-string type with useNullForErrorCells=false and inferSchema=true") {
val df = readExcel(
path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"),
path = resourcePath("/with_errors_all_types.xlsx"),
options = Map("inferSchema" -> "true", "useNullForErrorCells" -> "false")
)
val expected = createDataFrame(expectedDataErrorsAsStringsInfer.asScala.toSeq, expectedSchemaInfer)
Expand All @@ -107,7 +58,7 @@ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait {

test("errors in string format when useNullForErrorCells=true and inferSchema=false") {
val df = readExcel(
path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"),
path = resourcePath("/with_errors_all_types.xlsx"),
options = Map("inferSchema" -> "false", "useNullForErrorCells" -> "true")
)
val expected = createDataFrame(expectedDataErrorsAsNullNonInfer.asScala.toSeq, expectedSchemaNonInfer)
Expand All @@ -116,7 +67,7 @@ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait {

test("errors in string format when useNullForErrorCells=false and inferSchema=false") {
val df = readExcel(
path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"),
path = resourcePath("/with_errors_all_types.xlsx"),
options = Map("inferSchema" -> "false", "useNullForErrorCells" -> "false")
)
val expected = createDataFrame(expectedDataErrorsAsStringsNonInfer.asScala.toSeq, expectedSchemaNonInfer)
Expand Down

0 comments on commit b75e7bb

Please sign in to comment.