[SPARK-10859] [SQL] fix stats of StringType in columnar cache
The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats. cc yhuai Author: Davies Liu <davies@databricks.com> Closes #8929 from davies/pushdown_string.
This commit is contained in:
parent
14978b785a
commit
ea02e5513a
|
@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats {
|
||||||
super.gatherStats(row, ordinal)
|
super.gatherStats(row, ordinal)
|
||||||
if (!row.isNullAt(ordinal)) {
|
if (!row.isNullAt(ordinal)) {
|
||||||
val value = row.getUTF8String(ordinal)
|
val value = row.getUTF8String(ordinal)
|
||||||
if (upper == null || value.compareTo(upper) > 0) upper = value
|
if (upper == null || value.compareTo(upper) > 0) upper = value.clone()
|
||||||
if (lower == null || value.compareTo(lower) < 0) lower = value
|
if (lower == null || value.compareTo(lower) < 0) lower = value.clone()
|
||||||
sizeInBytes += STRING.actualSize(row, ordinal)
|
sizeInBytes += STRING.actualSize(row, ordinal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
|
||||||
// Drop the cache.
|
// Drop the cache.
|
||||||
cached.unpersist()
|
cached.unpersist()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") {
|
||||||
|
val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s")
|
||||||
|
data.cache()
|
||||||
|
assert(data.count() === 10)
|
||||||
|
assert(data.filter($"s" === "3").count() === 1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue