[SPARK-31578][R] Vectorize schema validation for arrow in types.R
### What changes were proposed in this pull request? Repeated `sapply` avoided in internal `checkSchemaInArrow` ### Why are the changes needed? Current implementation is doubly inefficient: 1. Repeatedly doing the same (95%) `sapply` loop 2. Doing scalar `==` on a vector (`==` should be done over the whole vector for efficiency) ### Does this PR introduce any user-facing change? No ### How was this patch tested? By my trusty friend the CI bots Closes #28372 from MichaelChirico/vectorize-types. Authored-by: Michael Chirico <michael.chirico@grabtaxi.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
a68d98cf4f
commit
410fa91321
|
@ -94,27 +94,22 @@ checkSchemaInArrow <- function(schema) {
|
|||
}
|
||||
|
||||
# Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
|
||||
if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
|
||||
field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())
|
||||
if (any(field_strings == "FloatType")) {
|
||||
stop("Arrow optimization in R does not support float type yet.")
|
||||
}
|
||||
if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
|
||||
if (any(field_strings == "BinaryType")) {
|
||||
stop("Arrow optimization in R does not support binary type yet.")
|
||||
}
|
||||
if (any(sapply(schema$fields(),
|
||||
function(x) startsWith(x$dataType.toString(),
|
||||
"ArrayType")))) {
|
||||
if (any(startsWith(field_strings, "ArrayType"))) {
|
||||
stop("Arrow optimization in R does not support array type yet.")
|
||||
}
|
||||
|
||||
# Arrow optimization in Spark does not yet support both cases below.
|
||||
if (any(sapply(schema$fields(),
|
||||
function(x) startsWith(x$dataType.toString(),
|
||||
"StructType")))) {
|
||||
if (any(startsWith(field_strings, "StructType"))) {
|
||||
stop("Arrow optimization in R does not support nested struct type yet.")
|
||||
}
|
||||
if (any(sapply(schema$fields(),
|
||||
function(x) startsWith(x$dataType.toString(),
|
||||
"MapType")))) {
|
||||
if (any(startsWith(field_strings, "MapType"))) {
|
||||
stop("Arrow optimization in R does not support map type yet.")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue