[SPARK-10859] [SQL] fix stats of StringType in columnar cache

The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats.

cc yhuai

Author: Davies Liu <davies@databricks.com>

Closes #8929 from davies/pushdown_string.
This commit is contained in:
Davies Liu 2015-09-28 14:40:40 -07:00 committed by Yin Huai
parent 14978b785a
commit ea02e5513a
2 changed files with 9 additions and 2 deletions

View file

@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats {
super.gatherStats(row, ordinal) super.gatherStats(row, ordinal)
if (!row.isNullAt(ordinal)) { if (!row.isNullAt(ordinal)) {
val value = row.getUTF8String(ordinal) val value = row.getUTF8String(ordinal)
if (upper == null || value.compareTo(upper) > 0) upper = value if (upper == null || value.compareTo(upper) > 0) upper = value.clone()
if (lower == null || value.compareTo(lower) < 0) lower = value if (lower == null || value.compareTo(lower) < 0) lower = value.clone()
sizeInBytes += STRING.actualSize(row, ordinal) sizeInBytes += STRING.actualSize(row, ordinal)
} }
} }

View file

@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
// Drop the cache. // Drop the cache.
cached.unpersist() cached.unpersist()
} }
test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") {
val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s")
data.cache()
assert(data.count() === 10)
assert(data.filter($"s" === "3").count() === 1)
}
} }