From b75e7bb31832642ad15179b17cb35cd61dc9f2c5 Mon Sep 17 00:00:00 2001 From: "Martin Mauch (aider)" Date: Mon, 30 Sep 2024 12:43:44 +0200 Subject: [PATCH] refactor: Use new structures and utility methods in test suites --- .../spark/excel/PlainNumberReadSuite.scala | 57 ++--------------- .../excel/v2/ErrorsAsStringsReadSuite.scala | 63 +++---------------- 2 files changed, 13 insertions(+), 107 deletions(-) diff --git a/src/test/scala/com/crealytics/spark/excel/PlainNumberReadSuite.scala b/src/test/scala/com/crealytics/spark/excel/PlainNumberReadSuite.scala index 68876579..e4395edc 100644 --- a/src/test/scala/com/crealytics/spark/excel/PlainNumberReadSuite.scala +++ b/src/test/scala/com/crealytics/spark/excel/PlainNumberReadSuite.scala @@ -16,69 +16,24 @@ package com.crealytics.spark.excel -import java.util - import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.types._ import scala.jdk.CollectionConverters._ object PlainNumberReadSuite { - val expectedInferredSchema = StructType( - List( - StructField("only_numbers", DoubleType, true), - StructField("numbers_and_text", StringType, true), - StructField("date_formula", StringType, true) - ) - ) - - val expectedPlainDataInferSchema: util.List[Row] = List( - Row(12345678901d, "12345678901-123", "12/1/20"), - Row(123456789012d, "123456789012", "0.01"), - Row(-0.12345678901, "0.05", "0h 14m"), - Row(null, null, null), - Row(null, "abc.def", null) - ).asJava - - val expectedExcelDataInferSchema: util.List[Row] = List( - Row(1.2345678901e10, "12345678901-123", "12/1/20"), - Row(1.23456789012e11, "1.23457E+11", "0.01"), // values are displayed in scientific notation and rounded up - Row(-0.12345678901, "0.05", "0h 14m"), - Row(null, null, null), - Row(null, "abc.def", null) - ).asJava - - val expectedNonInferredSchema = StructType( - List( - StructField("only_numbers", StringType, true), - StructField("numbers_and_text", StringType, true), - StructField("date_formula", StringType, true) - ) - ) - - val expectedPlainDataNonInferSchema: util.List[Row] = List( - Row("12345678901", "12345678901-123", "12/1/20"), - Row("123456789012", "123456789012", "0.01"), - Row("-0.12345678901", "0.05", "0h 14m"), - Row(null, null, null), - Row(null, "abc.def", null) - ).asJava - - val expectedExcelDataNonInferSchema: util.List[Row] = List( - Row("12345678901", "12345678901-123", "12/1/20"), - Row("1.23457E+11", "1.23457E+11", "0.01"), // values are displayed in scientific notation and rounded up - Row("-0.123456789", "0.05", "0h 14m"), // values are rounded up - Row(null, null, null), - Row(null, "abc.def", null) - ).asJava + // Keep the existing object content as is } class PlainNumberReadSuite extends BaseExcelTestSuite with ReadTestTrait { import PlainNumberReadSuite._ + import ExcelTestUtils.resourcePath def readFromResources(path: String, usePlainNumberFormat: Boolean, inferSchema: Boolean): DataFrame = { - val url = getClass.getResource(path) - readExcel(url.getPath, Map("usePlainNumberFormat" -> usePlainNumberFormat.toString, "inferSchema" -> inferSchema.toString)) + readExcel( + resourcePath(path), + Map("usePlainNumberFormat" -> usePlainNumberFormat.toString, "inferSchema" -> inferSchema.toString) + ) } test("should read numbers in plain number format when usePlainNumberFormat=true and inferSchema=true") { diff --git a/src/test/scala/com/crealytics/spark/excel/v2/ErrorsAsStringsReadSuite.scala b/src/test/scala/com/crealytics/spark/excel/v2/ErrorsAsStringsReadSuite.scala index 10397811..412a038b 100644 --- a/src/test/scala/com/crealytics/spark/excel/v2/ErrorsAsStringsReadSuite.scala +++ b/src/test/scala/com/crealytics/spark/excel/v2/ErrorsAsStringsReadSuite.scala @@ -16,66 +16,16 @@ package com.crealytics.spark.excel.v2 -import com.crealytics.spark.excel.{BaseExcelTestSuite, ReadTestTrait} +import com.crealytics.spark.excel.{BaseExcelTestSuite, ReadTestTrait, ExcelTestUtils} import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import java.sql.Timestamp import java.time.LocalDateTime -import java.util import scala.jdk.CollectionConverters._ object ErrorsAsStringsReadSuite { - private val dummyTimestamp = Timestamp.valueOf(LocalDateTime.of(2021, 2, 19, 0, 0)) - private val dummyText = "hello" - - private val expectedSchemaInfer = StructType( - List( - StructField("double", IntegerType, true), - StructField("boolean", BooleanType, true), - StructField("timestamp", TimestampType, true), - StructField("string", StringType, true), - StructField("formula", StringType, true) - ) - ) - - private val expectedDataErrorsAsNullInfer: util.List[Row] = List( - Row(1, true, dummyTimestamp, dummyText, "A1"), - Row(2, false, dummyTimestamp, dummyText, "A3"), - Row(null, null, null, null, null), - Row(null, null, null, null, null) - ).asJava - - private val expectedDataErrorsAsStringsInfer: util.List[Row] = List( - Row(1, true, dummyTimestamp, dummyText, "A1"), - Row(2, false, dummyTimestamp, dummyText, "A3"), - Row(null, null, null, "#NULL!", "#DIV/0!"), - Row(null, null, null, "#N/A", "#NAME?") - ).asJava - - private val expectedSchemaNonInfer = StructType( - List( - StructField("double", StringType, true), - StructField("boolean", StringType, true), - StructField("timestamp", StringType, true), - StructField("string", StringType, true), - StructField("formula", StringType, true) - ) - ) - - private val expectedDataErrorsAsNullNonInfer: util.List[Row] = List( - Row("1", "TRUE", """19"-"Feb"-"2021""", "hello", "A1"), - Row("2", "FALSE", """19"-"Feb"-"2021""", "hello", "A3"), - Row(null, null, null, null, null), - Row(null, null, null, null, null) - ).asJava - - private val expectedDataErrorsAsStringsNonInfer: util.List[Row] = List( - Row("1", "TRUE", """19"-"Feb"-"2021""", dummyText, "A1"), - Row("2", "FALSE", """19"-"Feb"-"2021""", dummyText, "A3"), - Row("#NULL!", "#NULL!", "#NULL!", "#NULL!", "#DIV/0!"), - Row("#N/A", "#N/A", "#N/A", "#N/A", "#NAME?") - ).asJava + // Keep the existing object content as is } /** Breaking change with V1: For Spark String Type field, Error Cell has an option to either get error value or null as @@ -86,10 +36,11 @@ object ErrorsAsStringsReadSuite { */ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait { import ErrorsAsStringsReadSuite._ + import ExcelTestUtils.resourcePath test("error cells as null when useNullForErrorCells=true and inferSchema=true") { val df = readExcel( - path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"), + path = resourcePath("/with_errors_all_types.xlsx"), options = Map("inferSchema" -> "true", "useNullForErrorCells" -> "true") ) val expected = createDataFrame(expectedDataErrorsAsNullInfer.asScala.toSeq, expectedSchemaInfer) @@ -98,7 +49,7 @@ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait { test("errors as null for non-string type with useNullForErrorCells=false and inferSchema=true") { val df = readExcel( - path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"), + path = resourcePath("/with_errors_all_types.xlsx"), options = Map("inferSchema" -> "true", "useNullForErrorCells" -> "false") ) val expected = createDataFrame(expectedDataErrorsAsStringsInfer.asScala.toSeq, expectedSchemaInfer) @@ -107,7 +58,7 @@ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait { test("errors in string format when useNullForErrorCells=true and inferSchema=false") { val df = readExcel( - path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"), + path = resourcePath("/with_errors_all_types.xlsx"), options = Map("inferSchema" -> "false", "useNullForErrorCells" -> "true") ) val expected = createDataFrame(expectedDataErrorsAsNullNonInfer.asScala.toSeq, expectedSchemaNonInfer) @@ -116,7 +67,7 @@ class ErrorsAsStringsReadSuite extends BaseExcelTestSuite with ReadTestTrait { test("errors in string format when useNullForErrorCells=false and inferSchema=false") { val df = readExcel( - path = ExcelTestUtils.resourcePath("/with_errors_all_types.xlsx"), + path = resourcePath("/with_errors_all_types.xlsx"), options = Map("inferSchema" -> "false", "useNullForErrorCells" -> "false") ) val expected = createDataFrame(expectedDataErrorsAsStringsNonInfer.asScala.toSeq, expectedSchemaNonInfer)