Skip to content

Commit

Permalink
refactor database random test (pingcap#2045)
Browse files Browse the repository at this point in the history
  • Loading branch information
marsishandsome authored Apr 28, 2021
1 parent 97666d1 commit 3e1ec30
Show file tree
Hide file tree
Showing 87 changed files with 1,168 additions and 1,641 deletions.
10 changes: 6 additions & 4 deletions core-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
<name>TiSpark Project Core Test</name>
<url>http://github.copm/pingcap/tispark</url>

<properties>
<scalaj.version>2.3.0</scalaj.version>
</properties>

<dependencies>
<dependency>
<groupId>com.pingcap.tispark</groupId>
Expand Down Expand Up @@ -55,6 +51,12 @@
<artifactId>tikv-client</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>com.pingcap.tispark</groupId>
<artifactId>db-random-test</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalaj</groupId>
<artifactId>scalaj-http_${scala.binary.version}</artifactId>
Expand Down
10 changes: 6 additions & 4 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@
<name>TiSpark Project Core Internal</name>
<url>http://github.copm/pingcap/tispark</url>

<properties>
<scalaj.version>2.3.0</scalaj.version>
</properties>

<dependencies>
<dependency>
<groupId>com.pingcap.tikv</groupId>
<artifactId>tikv-client</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>com.pingcap.tispark</groupId>
<artifactId>db-random-test</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalaj</groupId>
<artifactId>scalaj-http_${scala.binary.version}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

package org.apache.spark.sql

import org.apache.spark.sql.test.generator.DataType.{getBaseType, DECIMAL, ReflectedDataType}
import org.apache.spark.sql.test.generator.TestDataGenerator.{
import com.pingcap.tispark.test.generator.DataType.{getBaseType, DECIMAL, ReflectedDataType}
import com.pingcap.tispark.test.generator.DataGenerator.{
getDecimal,
getLength,
isCharOrBinary,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,26 @@

package org.apache.spark.sql.clustered

import org.apache.spark.sql.BaseTiSparkTest
import org.apache.spark.sql.insertion.BaseEnumerateDataTypesTestSpec
import org.apache.spark.sql.test.generator.DataType.{BIT, BOOLEAN, ReflectedDataType}
import org.apache.spark.sql.test.generator.TestDataGenerator._
import org.apache.spark.sql.test.generator._
import com.pingcap.tispark.test.generator.DataType._
import com.pingcap.tispark.test.generator.DataGenerator._
import com.pingcap.tispark.test.generator._
import org.apache.spark.sql.types.BaseRandomDataTypeTest

import scala.util.Random

trait ClusteredIndexTest extends BaseTiSparkTest with BaseEnumerateDataTypesTestSpec {
protected val testDataTypes: List[ReflectedDataType] = baseDataTypes
trait ClusteredIndexTest extends BaseRandomDataTypeTest {
protected val testDataTypes1: List[ReflectedDataType] =
List(BIT, INT, DECIMAL, TIMESTAMP, TEXT, BLOB)

protected val tablePrefix: String = "clustered"
protected val testDataTypes2: List[ReflectedDataType] =
List(BOOLEAN, BIGINT, DOUBLE, DATE, VARCHAR)

override def dbName: String = "tispark_test"
override protected val database: String = "clustered_index_test"

override def rowCount = 10

override def dataTypes: List[ReflectedDataType] = ???

override def unsignedDataTypes: List[ReflectedDataType] = ???

override def testDesc: String = ???

override def test(): Unit = ???

override def afterAll(): Unit = {
super.afterAll()
}

override def genIndex(dataTypes: List[ReflectedDataType], r: Random): List[List[Index]] = {
val size = dataTypes.length
override protected def genIndex(
dataTypesWithDesc: List[(ReflectedDataType, String, String)],
r: Random): List[List[Index]] = {
val size = dataTypesWithDesc.length
var keys1: List[Index] = Nil
var keys2: List[Index] = Nil

Expand All @@ -59,7 +48,7 @@ trait ClusteredIndexTest extends BaseTiSparkTest with BaseEnumerateDataTypesTest
{
var pkCol: List[IndexColumn] = Nil
primaryKeyList.foreach { i =>
pkCol = if (isStringType(dataTypes(i))) {
pkCol = if (isStringType(dataTypesWithDesc(i)._1)) {
PrefixColumn(i + 1, r.nextInt(4) + 2) :: pkCol
} else {
DefaultColumn(i + 1) :: pkCol
Expand All @@ -71,7 +60,7 @@ trait ClusteredIndexTest extends BaseTiSparkTest with BaseEnumerateDataTypesTest
}

{
val keyCol = if (isStringType(dataTypes(uniqueKey))) {
val keyCol = if (isStringType(dataTypesWithDesc(uniqueKey)._1)) {
PrefixColumn(uniqueKey + 1, r.nextInt(4) + 2) :: Nil
} else {
DefaultColumn(uniqueKey + 1) :: Nil
Expand All @@ -84,70 +73,12 @@ trait ClusteredIndexTest extends BaseTiSparkTest with BaseEnumerateDataTypesTest
List(keys1, keys2)
}

protected def test(schema: Schema): Unit = {
executeTiDBSQL(s"drop table if exists `$dbName`.`${schema.tableName}`;")
executeTiDBSQL(schema.toString(isClusteredIndex = true))

var rc = rowCount
schema.columnInfo.foreach { columnInfo =>
if (columnInfo.dataType.equals(BIT) || columnInfo.dataType.equals(BOOLEAN)) {
rc = 2
}
}
protected def test(schemaAndData: SchemaAndData): Unit = {
loadToDB(schemaAndData)

for (insert <- toInsertSQL(schema, generateRandomRows(schema, rc, r))) {
try {
executeTiDBSQL(insert)
} catch {
case _: Throwable => println("insert fail")
}
}

val sql = s"select * from `${schema.tableName}`"
spark.sql(s"explain $sql").show(200, false)
spark.sql(s"$sql").show(200, false)
runTest(sql, skipJDBC = true)
}

private def executeTiDBSQL(sql: String): Unit = {
setCurrentDatabase(schemaAndData.schema.database)
val sql = s"select * from `${schemaAndData.schema.tableName}`"
println(sql)
tidbStmt.execute(sql)
}

private def toInsertSQL(schema: Schema, data: List[TiRow]): List[String] = {
data
.map { row =>
(0 until row.fieldCount())
.map { idx =>
val value = row.get(idx, schema.columnInfo(idx).generator.tiDataType)
toOutput(value)
}
.mkString("(", ",", ")")
}
.map { text =>
s"INSERT INTO `$dbName`.`${schema.tableName}` VALUES $text;"
}
runTest(sql, skipJDBC = true)
}

private def toOutput(value: Any): String =
value match {
case null => null
case _: Boolean => value.toString
case _: Number => value.toString
case arr: Array[Byte] =>
s"X\'${arr.map { b =>
f"${new java.lang.Byte(b)}%02x"
}.mkString}\'"
case arr: Array[Boolean] =>
s"b\'${arr.map {
case true => "1"
case false => "0"
}.mkString}\'"
case ts: java.sql.Timestamp =>
// convert to Timestamp output with current TimeZone
val zonedDateTime = ts.toLocalDateTime.atZone(java.util.TimeZone.getDefault.toZoneId)
val milliseconds = zonedDateTime.toEpochSecond * 1000L + zonedDateTime.getNano / 1000000
s"\'${new java.sql.Timestamp(milliseconds)}\'"
case _ => s"\'$value\'"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
package org.apache.spark.sql.clustered

import com.pingcap.tispark.TiConfigConst
import org.apache.spark.sql.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.NullableType

class IndexScan0Suite extends ClusteredIndexTest {

override def beforeAll(): Unit = {
super.beforeAll()
spark.sqlContext.setConf(TiConfigConst.USE_INDEX_SCAN_FIRST, "true")
Expand All @@ -34,10 +34,16 @@ class IndexScan0Suite extends ClusteredIndexTest {
if (!supportClusteredIndex) {
cancel("currently tidb instance does not support clustered index")
}
for (dataType1 <- testDataTypes) {
for (dataType2 <- testDataTypes) {
val schemas = genSchema(List(dataType2, dataType1, INT), tablePrefix)
schemas.foreach { schema =>
for (dataType1 <- testDataTypes1) {
for (dataType2 <- testDataTypes2) {
val schemaAndDataList = genSchemaAndData(
rowCount,
List(dataType2, dataType1, INT).map(d =>
genDescription(d, NullableType.NumericNotNullable)),
database,
isClusteredIndex = true,
hasTiFlashReplica = enableTiFlashTest)
schemaAndDataList.foreach { schema =>
test(schema)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
package org.apache.spark.sql.clustered

import com.pingcap.tispark.TiConfigConst
import org.apache.spark.sql.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.NullableType

class IndexScan1Suite extends ClusteredIndexTest {

override def beforeAll(): Unit = {
super.beforeAll()
spark.sqlContext.setConf(TiConfigConst.USE_INDEX_SCAN_FIRST, "true")
Expand All @@ -30,14 +30,20 @@ class IndexScan1Suite extends ClusteredIndexTest {
super.afterAll()
}

test("index scan 1: primary key has two columns") {
test("index scan 0: primary key has two columns") {
if (!supportClusteredIndex) {
cancel("currently tidb instance does not support clustered index")
}
for (dataType1 <- testDataTypes) {
for (dataType2 <- testDataTypes) {
val schemas = genSchema(List(dataType2, dataType1, INT, INT), tablePrefix)
schemas.foreach { schema =>
for (dataType1 <- testDataTypes2) {
for (dataType2 <- testDataTypes1) {
val schemaAndDataList = genSchemaAndData(
rowCount,
List(dataType2, dataType1, INT, INT).map(d =>
genDescription(d, NullableType.NumericNotNullable)),
database,
isClusteredIndex = true,
hasTiFlashReplica = enableTiFlashTest)
schemaAndDataList.foreach { schema =>
test(schema)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,24 @@

package org.apache.spark.sql.clustered

import org.apache.spark.sql.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.NullableType

class TableScan0Suite extends ClusteredIndexTest {
test("table scan 0: primary key has one column") {
if (!supportClusteredIndex) {
cancel("currently tidb instance does not support clustered index")
}
for (dataType1 <- testDataTypes) {
for (dataType2 <- testDataTypes) {
val schemas = genSchema(List(dataType2, dataType1, INT), tablePrefix)
schemas.foreach { schema =>
for (dataType1 <- testDataTypes1) {
for (dataType2 <- testDataTypes2) {
val schemaAndDataList = genSchemaAndData(
rowCount,
List(dataType2, dataType1, INT).map(d =>
genDescription(d, NullableType.NumericNotNullable)),
database,
isClusteredIndex = true,
hasTiFlashReplica = enableTiFlashTest)
schemaAndDataList.foreach { schema =>
test(schema)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,24 @@

package org.apache.spark.sql.clustered

import org.apache.spark.sql.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.DataType.INT
import com.pingcap.tispark.test.generator.NullableType

class TableScan1Suite extends ClusteredIndexTest {
test("table scan 1: primary key has two columns") {
test("table scan 0: primary key has two columns") {
if (!supportClusteredIndex) {
cancel("currently tidb instance does not support clustered index")
}
for (dataType1 <- testDataTypes) {
for (dataType2 <- testDataTypes) {
val schemas = genSchema(List(dataType2, dataType1, INT, INT), tablePrefix)
schemas.foreach { schema =>
for (dataType1 <- testDataTypes2) {
for (dataType2 <- testDataTypes1) {
val schemaAndDataList = genSchemaAndData(
rowCount,
List(dataType2, dataType1, INT, INT).map(d =>
genDescription(d, NullableType.NumericNotNullable)),
database,
isClusteredIndex = true,
hasTiFlashReplica = enableTiFlashTest)
schemaAndDataList.foreach { schema =>
test(schema)
}
}
Expand Down
Loading

0 comments on commit 3e1ec30

Please sign in to comment.