Skip to content

Commit

Permalink
Use JSONUtils.castStringsToIntegers
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia committed Oct 18, 2024
1 parent 01724df commit 8ec6474
Showing 1 changed file with 6 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,33 +63,6 @@ object GpuJsonReadCommon {
}


private def sanitizeInts(input: ColumnView): ColumnVector = {
// Integer numbers cannot look like a float, so no `.` or e The rest of the parsing should
// handle this correctly. The rest of the validation is in CUDF itself

val tmp = withResource(Scalar.fromString(".")) { dot =>
withResource(input.stringContains(dot)) { hasDot =>
withResource(Scalar.fromString("e")) { e =>
withResource(input.stringContains(e)) { hase =>
hasDot.or(hase)
}
}
}
}
val invalid = withResource(tmp) { _ =>
withResource(Scalar.fromString("E")) { E =>
withResource(input.stringContains(E)) { hasE =>
tmp.or(hasE)
}
}
}
withResource(invalid) { _ =>
withResource(Scalar.fromNull(DType.STRING)) { nullString =>
invalid.ifElse(nullString, input)
}
}
}

private def castStringToFloat(cv: ColumnView, dt: DType,
options: JSONOptions): ColumnVector = {
// TableDebug.get().debug("input", cv)
Expand Down Expand Up @@ -206,12 +179,15 @@ object GpuJsonReadCommon {
//
//

//
// DONE
case (cv, Some(dt))
if (dt == ByteType || dt == ShortType || dt == IntegerType || dt == LongType ) &&
cv.getType == DType.STRING =>
withResource(sanitizeInts(cv)) { tmp =>
CastStrings.toInteger(tmp, false, GpuColumnVector.getNonNestedRapidsType(dt))
}
JSONUtils.castStringsToIntegers(cv, GpuColumnVector.getNonNestedRapidsType(dt))
//
//

case (cv, Some(dt)) if cv.getType == DType.STRING =>
GpuCast.doCast(cv, StringType, dt)
}
Expand Down

0 comments on commit 8ec6474

Please sign in to comment.