[SPARK-29392][CORE][SQL][FOLLOWUP] Avoid deprecated (in 2.13) Symbol syntax 'foo in favor of simpler expression, where it generated deprecation warnings

### What changes were proposed in this pull request?

Where it generates a deprecation warning in Scala 2.13, replace Symbol shorthand syntax `'foo` with an equivalent.

### Why are the changes needed?

Symbol syntax `'foo` is deprecated in Scala 2.13. The lines changed below otherwise generate about 440 warnings when building for 2.13.

The previous PR directly replaced many usages with `Symbol("foo")`. But it's also used to specify Columns via implicit conversion (`.select('foo)`) or even where simple Strings are used (`.as('foo)`), as it's kind of an abstraction for interned Strings.

While I find this syntax confusing and would like to deprecate it, here I just replaced it where it generates a build warning (not sure why all occurrences don't): `$"foo"` or just `"foo"`.

### Does this PR introduce any user-facing change?

Should not change behavior.

### How was this patch tested?

Existing tests.

Closes #26748 from srowen/SPARK-29392.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Sean Owen 2019-12-04 15:03:26 -08:00 committed by Dongjoon Hyun
parent a2102c81ee
commit 2ceed6f32c
21 changed files with 525 additions and 520 deletions

View file

@ -59,9 +59,9 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
// Dataset for testing SparseVector
val toSparse: Vector => SparseVector = _.asInstanceOf[DenseVector].toSparse
val sparse = udf(toSparse)
smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse('features))
smallSparseValidationDataset = smallValidationDataset.withColumn("features", sparse('features))
smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse($"features"))
smallSparseValidationDataset =
smallValidationDataset.withColumn("features", sparse($"features"))
}
/**

View file

@ -161,14 +161,14 @@ class PowerIterationClusteringSuite extends SparkFunSuite
}
test("test default weight") {
val dataWithoutWeight = data.sample(0.5, 1L).select('src, 'dst)
val dataWithoutWeight = data.sample(0.5, 1L).select("src", "dst")
val assignments = new PowerIterationClustering()
.setK(2)
.setMaxIter(40)
.assignClusters(dataWithoutWeight)
val localAssignments = assignments
.select('id, 'cluster)
.select("id", "cluster")
.as[(Long, Int)].collect().toSet
val dataWithWeightOne = dataWithoutWeight.withColumn("weight", lit(1.0))
@ -178,7 +178,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite
.setMaxIter(40)
.assignClusters(dataWithWeightOne)
val localAssignments2 = assignments2
.select('id, 'cluster)
.select("id", "cluster")
.as[(Long, Int)].collect().toSet
assert(localAssignments === localAssignments2)

View file

@ -59,12 +59,12 @@ class DistributionSuite extends SparkFunSuite {
true)
checkSatisfied(
HashPartitioning(Seq('a), 10),
HashPartitioning(Seq($"a"), 10),
UnspecifiedDistribution,
true)
checkSatisfied(
RangePartitioning(Seq('a.asc), 10),
RangePartitioning(Seq($"a".asc), 10),
UnspecifiedDistribution,
true)
@ -101,22 +101,22 @@ class DistributionSuite extends SparkFunSuite {
true)
checkSatisfied(
HashPartitioning(Seq('a), 1),
HashPartitioning(Seq($"a"), 1),
AllTuples,
true)
checkSatisfied(
HashPartitioning(Seq('a), 10),
HashPartitioning(Seq($"a"), 10),
AllTuples,
false)
checkSatisfied(
RangePartitioning(Seq('a.asc), 1),
RangePartitioning(Seq($"a".asc), 1),
AllTuples,
true)
checkSatisfied(
RangePartitioning(Seq('a.asc), 10),
RangePartitioning(Seq($"a".asc), 10),
AllTuples,
false)
@ -130,17 +130,17 @@ class DistributionSuite extends SparkFunSuite {
// SinglePartition can satisfy all the distributions except `BroadcastDistribution`
checkSatisfied(
SinglePartition,
ClusteredDistribution(Seq('a, 'b, 'c)),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)
checkSatisfied(
SinglePartition,
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
true)
checkSatisfied(
SinglePartition,
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
true)
checkSatisfied(
@ -153,56 +153,56 @@ class DistributionSuite extends SparkFunSuite {
// HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
// the required clustering expressions.
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)
checkSatisfied(
HashPartitioning(Seq('b, 'c), 10),
ClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"b", $"c"), 10),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('b, 'c)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"b", $"c")),
false)
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('d, 'e)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"d", $"e")),
false)
// HashPartitioning can satisfy HashClusteredDistribution iff its hash expressions are exactly
// same with the required hash clustering expressions.
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
true)
checkSatisfied(
HashPartitioning(Seq('c, 'b, 'a), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"c", $"b", $"a"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
false)
checkSatisfied(
HashPartitioning(Seq('a, 'b), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
HashPartitioning(Seq($"a", $"b"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
false)
// HashPartitioning cannot satisfy OrderedDistribution
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
false)
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 1),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
HashPartitioning(Seq($"a", $"b", $"c"), 1),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
false) // TODO: this can be relaxed.
checkSatisfied(
HashPartitioning(Seq('b, 'c), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
HashPartitioning(Seq($"b", $"c"), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
false)
}
@ -210,18 +210,18 @@ class DistributionSuite extends SparkFunSuite {
// RangePartitioning can satisfy OrderedDistribution iff its ordering is a prefix
// of the required ordering, or the required ordering is a prefix of its ordering.
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
true)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc)),
true)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc, 'd.desc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc, 'd.desc)),
true)
// TODO: We can have an optimization to first sort the dataset
@ -229,78 +229,78 @@ class DistributionSuite extends SparkFunSuite {
// should tradeoff the benefit of a less number of Exchange operators
// and the parallelism.
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.desc, 'c.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".desc, $"c".asc)),
false)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('b.asc, 'a.asc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"b".asc, $"a".asc)),
false)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
OrderedDistribution(Seq('a.asc, 'b.asc, 'd.desc)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
OrderedDistribution(Seq($"a".asc, $"b".asc, 'd.desc)),
false)
// RangePartitioning can satisfy ClusteredDistribution iff its ordering expressions are a subset
// of the required clustering expressions.
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('a, 'b, 'c)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"a", $"b", $"c")),
true)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('c, 'b, 'a)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"c", $"b", $"a")),
true)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('b, 'c, 'a, 'd)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"b", $"c", $"a", $"d")),
true)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('a, 'b)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"a", $"b")),
false)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('c, 'd)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"c", $"d")),
false)
// RangePartitioning cannot satisfy HashClusteredDistribution
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
HashClusteredDistribution(Seq('a, 'b, 'c)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c")),
false)
}
test("Partitioning.numPartitions must match Distribution.requiredNumPartitions to satisfy it") {
checkSatisfied(
SinglePartition,
ClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
ClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
false)
checkSatisfied(
SinglePartition,
HashClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
false)
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
false)
checkSatisfied(
HashPartitioning(Seq('a, 'b, 'c), 10),
HashClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
HashPartitioning(Seq($"a", $"b", $"c"), 10),
HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
false)
checkSatisfied(
RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
false)
}
}

View file

@ -96,7 +96,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
test("cache temp table") {
withTempView("tempTable") {
testData.select('key).createOrReplaceTempView("tempTable")
testData.select("key").createOrReplaceTempView("tempTable")
assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
spark.catalog.cacheTable("tempTable")
assertCached(sql("SELECT COUNT(*) FROM tempTable"))
@ -127,8 +127,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
}
test("uncaching temp table") {
testData.select('key).createOrReplaceTempView("tempTable1")
testData.select('key).createOrReplaceTempView("tempTable2")
testData.select("key").createOrReplaceTempView("tempTable1")
testData.select("key").createOrReplaceTempView("tempTable2")
spark.catalog.cacheTable("tempTable1")
assertCached(sql("SELECT COUNT(*) FROM tempTable1"))
@ -361,15 +361,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
}
test("Drops temporary table") {
testData.select('key).createOrReplaceTempView("t1")
testData.select("key").createOrReplaceTempView("t1")
spark.table("t1")
spark.catalog.dropTempView("t1")
intercept[AnalysisException](spark.table("t1"))
}
test("Drops cached temporary table") {
testData.select('key).createOrReplaceTempView("t1")
testData.select('key).createOrReplaceTempView("t2")
testData.select("key").createOrReplaceTempView("t1")
testData.select("key").createOrReplaceTempView("t2")
spark.catalog.cacheTable("t1")
assert(spark.catalog.isCached("t1"))
@ -859,7 +859,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
test("SPARK-23880 table cache should be lazy and don't trigger any jobs") {
val cachedData = checkIfNoJobTriggered {
spark.range(1002).filter('id > 1000).orderBy('id.desc).cache()
spark.range(1002).filter($"id" > 1000).orderBy($"id".desc).cache()
}
assert(cachedData.collect === Seq(1001))
}
@ -891,7 +891,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
test("SPARK-24596 Non-cascading Cache Invalidation - drop persistent view") {
withTable("t") {
spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
.write.format("json").saveAsTable("t")
withView("t1") {
withTempView("t2") {
@ -911,7 +911,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
test("SPARK-24596 Non-cascading Cache Invalidation - uncache table") {
withTable("t") {
spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
.write.format("json").saveAsTable("t")
withTempView("t1", "t2") {
sql("CACHE TABLE t")

View file

@ -537,12 +537,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
test("sqrt") {
checkAnswer(
testData.select(sqrt('key)).orderBy('key.asc),
testData.select(sqrt($"key")).orderBy($"key".asc),
(1 to 100).map(n => Row(math.sqrt(n)))
)
checkAnswer(
testData.select(sqrt('value), 'key).orderBy('key.asc, 'value.asc),
testData.select(sqrt($"value"), $"key").orderBy($"key".asc, $"value".asc),
(1 to 100).map(n => Row(math.sqrt(n), n))
)
@ -554,12 +554,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
test("upper") {
checkAnswer(
lowerCaseData.select(upper('l)),
lowerCaseData.select(upper($"l")),
('a' to 'd').map(c => Row(c.toString.toUpperCase(Locale.ROOT)))
)
checkAnswer(
testData.select(upper('value), 'key),
testData.select(upper($"value"), $"key"),
(1 to 100).map(n => Row(n.toString, n))
)
@ -575,12 +575,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
test("lower") {
checkAnswer(
upperCaseData.select(lower('L)),
upperCaseData.select(lower($"L")),
('A' to 'F').map(c => Row(c.toString.toLowerCase(Locale.ROOT)))
)
checkAnswer(
testData.select(lower('value), 'key),
testData.select(lower($"value"), $"key"),
(1 to 100).map(n => Row(n.toString, n))
)
@ -753,8 +753,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
}
test("columns can be compared") {
assert('key.desc == 'key.desc)
assert('key.desc != 'key.asc)
assert($"key".desc == $"key".desc)
assert($"key".desc != $"key".asc)
}
test("alias with metadata") {
@ -817,7 +817,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
}
test("randn") {
val randCol = testData.select('key, randn(5L).as("rand"))
val randCol = testData.select($"key", randn(5L).as("rand"))
randCol.columns.length should be (2)
val rows = randCol.collect()
rows.foreach { row =>

View file

@ -41,7 +41,7 @@ class ConfigBehaviorSuite extends QueryTest with SharedSparkSession {
// Trigger a sort
// Range has range partitioning in its output now. To have a range shuffle, we
// need to run a repartition first.
val data = spark.range(0, n, 1, 1).repartition(10).sort('id.desc)
val data = spark.range(0, n, 1, 1).repartition(10).sort($"id".desc)
.selectExpr("SPARK_PARTITION_ID() pid", "id").as[(Int, Long)].collect()
// Compute histogram for the number of records per partition post sort

View file

@ -153,7 +153,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
test("infers schemas of a CSV string and pass to to from_csv") {
val in = Seq("""0.123456789,987654321,"San Francisco"""").toDS()
val options = Map.empty[String, String].asJava
val out = in.select(from_csv('value, schema_of_csv("0.1,1,a"), options) as "parsed")
val out = in.select(from_csv($"value", schema_of_csv("0.1,1,a"), options) as "parsed")
val expected = StructType(Seq(StructField(
"parsed",
StructType(Seq(

View file

@ -45,7 +45,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
Seq(Row(1, 3), Row(2, 3), Row(3, 3))
)
checkAnswer(
testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum('totB)),
testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum($"totB")),
Row(9)
)
checkAnswer(
@ -111,7 +111,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
val df = Seq(("some[thing]", "random-string")).toDF("key", "val")
checkAnswer(
df.groupBy(regexp_extract('key, "([a-z]+)\\[", 1)).count(),
df.groupBy(regexp_extract($"key", "([a-z]+)\\[", 1)).count(),
Row("some", 1) :: Nil
)
}
@ -277,7 +277,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("agg without groups") {
checkAnswer(
testData2.agg(sum('b)),
testData2.agg(sum($"b")),
Row(9)
)
}
@ -291,52 +291,53 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("average") {
checkAnswer(
testData2.agg(avg('a), mean('a)),
testData2.agg(avg($"a"), mean($"a")),
Row(2.0, 2.0))
checkAnswer(
testData2.agg(avg('a), sumDistinct('a)), // non-partial
testData2.agg(avg($"a"), sumDistinct($"a")), // non-partial
Row(2.0, 6.0) :: Nil)
checkAnswer(
decimalData.agg(avg('a)),
decimalData.agg(avg($"a")),
Row(new java.math.BigDecimal(2)))
checkAnswer(
decimalData.agg(avg('a), sumDistinct('a)), // non-partial
decimalData.agg(avg($"a"), sumDistinct($"a")), // non-partial
Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
checkAnswer(
decimalData.agg(avg('a cast DecimalType(10, 2))),
decimalData.agg(avg($"a" cast DecimalType(10, 2))),
Row(new java.math.BigDecimal(2)))
// non-partial
checkAnswer(
decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))),
decimalData.agg(
avg($"a" cast DecimalType(10, 2)), sumDistinct($"a" cast DecimalType(10, 2))),
Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
}
test("null average") {
checkAnswer(
testData3.agg(avg('b)),
testData3.agg(avg($"b")),
Row(2.0))
checkAnswer(
testData3.agg(avg('b), countDistinct('b)),
testData3.agg(avg($"b"), countDistinct($"b")),
Row(2.0, 1))
checkAnswer(
testData3.agg(avg('b), sumDistinct('b)), // non-partial
testData3.agg(avg($"b"), sumDistinct($"b")), // non-partial
Row(2.0, 2.0))
}
test("zero average") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(avg('a)),
emptyTableData.agg(avg($"a")),
Row(null))
checkAnswer(
emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
emptyTableData.agg(avg($"a"), sumDistinct($"b")), // non-partial
Row(null, null))
}
@ -344,28 +345,29 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
assert(testData2.count() === testData2.rdd.map(_ => 1).count())
checkAnswer(
testData2.agg(count('a), sumDistinct('a)), // non-partial
testData2.agg(count($"a"), sumDistinct($"a")), // non-partial
Row(6, 6.0))
}
test("null count") {
checkAnswer(
testData3.groupBy('a).agg(count('b)),
testData3.groupBy($"a").agg(count($"b")),
Seq(Row(1, 0), Row(2, 1))
)
checkAnswer(
testData3.groupBy('a).agg(count('a + 'b)),
testData3.groupBy($"a").agg(count($"a" + $"b")),
Seq(Row(1, 0), Row(2, 1))
)
checkAnswer(
testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
testData3.agg(
count($"a"), count($"b"), count(lit(1)), countDistinct($"a"), countDistinct($"b")),
Row(2, 1, 2, 2, 1)
)
checkAnswer(
testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
testData3.agg(count($"b"), countDistinct($"b"), sumDistinct($"b")), // non-partial
Row(1, 1, 2)
)
}
@ -380,17 +382,17 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
.toDF("key1", "key2", "key3")
checkAnswer(
df1.agg(countDistinct('key1, 'key2)),
df1.agg(countDistinct($"key1", $"key2")),
Row(3)
)
checkAnswer(
df1.agg(countDistinct('key1, 'key2, 'key3)),
df1.agg(countDistinct($"key1", $"key2", $"key3")),
Row(3)
)
checkAnswer(
df1.groupBy('key1).agg(countDistinct('key2, 'key3)),
df1.groupBy($"key1").agg(countDistinct($"key2", $"key3")),
Seq(Row("a", 2), Row("x", 1))
)
}
@ -398,14 +400,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("zero count") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
emptyTableData.agg(count($"a"), sumDistinct($"a")), // non-partial
Row(0, null))
}
test("stddev") {
val testData2ADev = math.sqrt(4.0 / 5.0)
checkAnswer(
testData2.agg(stddev('a), stddev_pop('a), stddev_samp('a)),
testData2.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")),
Row(testData2ADev, math.sqrt(4 / 6.0), testData2ADev))
checkAnswer(
testData2.agg(stddev("a"), stddev_pop("a"), stddev_samp("a")),
@ -415,47 +417,47 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("zero stddev") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(stddev('a), stddev_pop('a), stddev_samp('a)),
emptyTableData.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")),
Row(null, null, null))
}
test("zero sum") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(sum('a)),
emptyTableData.agg(sum($"a")),
Row(null))
}
test("zero sum distinct") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(sumDistinct('a)),
emptyTableData.agg(sumDistinct($"a")),
Row(null))
}
test("moments") {
val sparkVariance = testData2.agg(variance('a))
val sparkVariance = testData2.agg(variance($"a"))
checkAggregatesWithTol(sparkVariance, Row(4.0 / 5.0), absTol)
val sparkVariancePop = testData2.agg(var_pop('a))
val sparkVariancePop = testData2.agg(var_pop($"a"))
checkAggregatesWithTol(sparkVariancePop, Row(4.0 / 6.0), absTol)
val sparkVarianceSamp = testData2.agg(var_samp('a))
val sparkVarianceSamp = testData2.agg(var_samp($"a"))
checkAggregatesWithTol(sparkVarianceSamp, Row(4.0 / 5.0), absTol)
val sparkSkewness = testData2.agg(skewness('a))
val sparkSkewness = testData2.agg(skewness($"a"))
checkAggregatesWithTol(sparkSkewness, Row(0.0), absTol)
val sparkKurtosis = testData2.agg(kurtosis('a))
val sparkKurtosis = testData2.agg(kurtosis($"a"))
checkAggregatesWithTol(sparkKurtosis, Row(-1.5), absTol)
}
test("zero moments") {
val input = Seq((1, 2)).toDF("a", "b")
checkAnswer(
input.agg(stddev('a), stddev_samp('a), stddev_pop('a), variance('a),
var_samp('a), var_pop('a), skewness('a), kurtosis('a)),
input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"),
var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0,
Double.NaN, Double.NaN))
@ -475,8 +477,8 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("null moments") {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(variance('a), var_samp('a), var_pop('a), skewness('a), kurtosis('a)),
checkAnswer(emptyTableData.agg(
variance($"a"), var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
Row(null, null, null, null, null))
checkAnswer(
@ -566,7 +568,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("SQL decimal test (used for catching certain decimal handling bugs in aggregates)") {
checkAnswer(
decimalData.groupBy('a cast DecimalType(10, 2)).agg(avg('b cast DecimalType(10, 2))),
decimalData.groupBy($"a" cast DecimalType(10, 2)).agg(avg($"b" cast DecimalType(10, 2))),
Seq(Row(new java.math.BigDecimal(1), new java.math.BigDecimal("1.5")),
Row(new java.math.BigDecimal(2), new java.math.BigDecimal("1.5")),
Row(new java.math.BigDecimal(3), new java.math.BigDecimal("1.5"))))
@ -653,7 +655,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
testData2.groupBy(lit(3), lit(4)).agg(lit(6), lit(7), sum("b")),
Seq(Row(3, 4, 6, 7, 9)))
checkAnswer(
testData2.groupBy(lit(3), lit(4)).agg(lit(6), 'b, sum("b")),
testData2.groupBy(lit(3), lit(4)).agg(lit(6), $"b", sum("b")),
Seq(Row(3, 4, 6, 1, 3), Row(3, 4, 6, 2, 6)))
checkAnswer(
@ -716,14 +718,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
assert(thrownException.message.contains("not allowed to use a window function"))
}
checkWindowError(testData2.select(min(avg('b).over(Window.partitionBy('a)))))
checkWindowError(testData2.agg(sum('b), max(rank().over(Window.orderBy('a)))))
checkWindowError(testData2.groupBy('a).agg(sum('b), max(rank().over(Window.orderBy('b)))))
checkWindowError(testData2.groupBy('a).agg(max(sum(sum('b)).over(Window.orderBy('a)))))
checkWindowError(
testData2.groupBy('a).agg(sum('b).as("s"), max(count("*").over())).where('s === 3))
checkAnswer(
testData2.groupBy('a).agg(max('b), sum('b).as("s"), count("*").over()).where('s === 3),
checkWindowError(testData2.select(min(avg($"b").over(Window.partitionBy($"a")))))
checkWindowError(testData2.agg(sum($"b"), max(rank().over(Window.orderBy($"a")))))
checkWindowError(testData2.groupBy($"a").agg(sum($"b"), max(rank().over(Window.orderBy($"b")))))
checkWindowError(testData2.groupBy($"a").agg(max(sum(sum($"b")).over(Window.orderBy($"a")))))
checkWindowError(testData2.groupBy($"a").agg(
sum($"b").as("s"), max(count("*").over())).where($"s" === 3))
checkAnswer(testData2.groupBy($"a").agg(
max($"b"), sum($"b").as("s"), count("*").over()).where($"s" === 3),
Row(1, 2, 3, 3) :: Row(2, 2, 3, 3) :: Row(3, 2, 3, 3) :: Nil)
checkWindowError(sql("SELECT MIN(AVG(b) OVER(PARTITION BY a)) FROM testData2"))
@ -739,7 +741,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") {
// Checks if these raise no exception
assert(testData.groupBy('key).toString.contains(
assert(testData.groupBy($"key").toString.contains(
"[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
assert(testData.groupBy(col("key")).toString.contains(
"[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
@ -954,10 +956,10 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
test("calendar interval agg support hash aggregate") {
val df1 = Seq((1, "1 day"), (2, "2 day"), (3, "3 day"), (3, null)).toDF("a", "b")
val df2 = df1.select(avg('b cast CalendarIntervalType))
val df2 = df1.select(avg($"b" cast CalendarIntervalType))
checkAnswer(df2, Row(new CalendarInterval(0, 2, 0)) :: Nil)
assert(df2.queryExecution.executedPlan.find(_.isInstanceOf[HashAggregateExec]).isDefined)
val df3 = df1.groupBy('a).agg(avg('b cast CalendarIntervalType))
val df3 = df1.groupBy($"a").agg(avg($"b" cast CalendarIntervalType))
checkAnswer(df3,
Row(1, new CalendarInterval(0, 1, 0)) ::
Row(2, new CalendarInterval(0, 2, 0)) ::

View file

@ -87,154 +87,154 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
test("filter pushdown - integer") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - long") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - float") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - double") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - string") {
withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - boolean") {
withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - decimal") {
withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(
Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
@ -245,46 +245,46 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
new Timestamp(milliseconds)
}
withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(timestamps(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - combinations with logical operators") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
checkFilterPredicate(
'_1.isNotNull,
$"_1".isNotNull,
"leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
)
checkFilterPredicate(
'_1 =!= 1,
$"_1" =!= 1,
"leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
)
checkFilterPredicate(
!('_1 < 4),
!($"_1" < 4),
"leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
)
checkFilterPredicate(
'_1 < 2 || '_1 > 3,
$"_1" < 2 || $"_1" > 3,
"leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
"expr = (or leaf-0 (not leaf-1))"
)
checkFilterPredicate(
'_1 < 2 && '_1 > 3,
$"_1" < 2 && $"_1" > 3,
"leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
"expr = (and (not leaf-0) leaf-1 (not leaf-2))"
)
@ -296,22 +296,22 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
Date.valueOf(day)
}
withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(dates(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(dates(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
@ -321,15 +321,15 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
}
// ArrayType
withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
checkNoFilterPredicate('_1.isNull, noneSupported = true)
checkNoFilterPredicate($"_1".isNull, noneSupported = true)
}
// BinaryType
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
checkNoFilterPredicate($"_1" <=> 1.b, noneSupported = true)
}
// MapType
withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
checkNoFilterPredicate($"_1".isNotNull, noneSupported = true)
}
}

View file

@ -88,154 +88,154 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
test("filter pushdown - integer") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - long") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - float") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - double") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - string") {
withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - boolean") {
withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - decimal") {
withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(
Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
@ -246,46 +246,47 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
new Timestamp(milliseconds)
}
withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(
Literal(timestamps(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - combinations with logical operators") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
checkFilterPredicate(
'_1.isNotNull,
$"_1".isNotNull,
"leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
)
checkFilterPredicate(
'_1 =!= 1,
$"_1" =!= 1,
"leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
)
checkFilterPredicate(
!('_1 < 4),
!($"_1" < 4),
"leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
)
checkFilterPredicate(
'_1 < 2 || '_1 > 3,
$"_1" < 2 || $"_1" > 3,
"leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
"expr = (or leaf-0 (not leaf-1))"
)
checkFilterPredicate(
'_1 < 2 && '_1 > 3,
$"_1" < 2 && $"_1" > 3,
"leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
"expr = (and (not leaf-0) leaf-1 (not leaf-2))"
)
@ -297,22 +298,22 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
Date.valueOf(day)
}
withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(dates(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(dates(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(dates(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(dates(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
@ -322,15 +323,15 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
}
// ArrayType
withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
checkNoFilterPredicate('_1.isNull, noneSupported = true)
checkNoFilterPredicate($"_1".isNull, noneSupported = true)
}
// BinaryType
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
checkNoFilterPredicate($"_1" <=> 1.b, noneSupported = true)
}
// MapType
withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
checkNoFilterPredicate($"_1".isNotNull, noneSupported = true)
}
}

View file

@ -46,7 +46,7 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
test("duplicated metastore relations") {
val df = spark.sql("SELECT * FROM src")
logInfo(df.queryExecution.toString)
df.as('a).join(df.as('b), $"a.key" === $"b.key")
df.as("a").join(df.as("b"), $"a.key" === $"b.key")
}
test("should not truncate struct type catalog string") {
@ -142,8 +142,8 @@ class DataSourceWithHiveMetastoreCatalogSuite
import testImplicits._
private val testDF = range(1, 3).select(
('id + 0.1) cast DecimalType(10, 3) as 'd1,
'id cast StringType as 'd2
($"id" + 0.1) cast DecimalType(10, 3) as "d1",
$"id" cast StringType as "d2"
).coalesce(1)
override def beforeAll(): Unit = {

View file

@ -212,7 +212,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
val filePath2 = new File(tempDir, "testParquet2").getCanonicalPath
val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str")
val df2 = df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
val df2 = df.as("x").join(df.as("y"), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
intercept[Throwable](df2.write.parquet(filePath))
val df3 = df2.toDF("str", "max_int")

View file

@ -700,7 +700,7 @@ object SPARK_9757 extends QueryTest {
val df =
hiveContext
.range(10)
.select(('id + 0.1) cast DecimalType(10, 3) as 'dec)
.select(($"id" + 0.1) cast DecimalType(10, 3) as "dec")
df.write.option("path", dir.getCanonicalPath).mode("overwrite").saveAsTable("t")
checkAnswer(hiveContext.table("t"), df)
}
@ -709,7 +709,7 @@ object SPARK_9757 extends QueryTest {
val df =
hiveContext
.range(10)
.select(callUDF("struct", ('id + 0.2) cast DecimalType(10, 3)) as 'dec_struct)
.select(callUDF("struct", ($"id" + 0.2) cast DecimalType(10, 3)) as "dec_struct")
df.write.option("path", dir.getCanonicalPath).mode("overwrite").saveAsTable("t")
checkAnswer(hiveContext.table("t"), df)
}
@ -771,8 +771,8 @@ object SPARK_14244 extends QueryTest {
import hiveContext.implicits._
try {
val window = Window.orderBy('id)
val df = spark.range(2).select(cume_dist().over(window).as('cdist)).orderBy('cdist)
val window = Window.orderBy("id")
val df = spark.range(2).select(cume_dist().over(window).as("cdist")).orderBy("cdist")
checkAnswer(df, Seq(Row(0.5D), Row(1.0D)))
} finally {
sparkContext.stop()

View file

@ -461,7 +461,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
// Columns `c + 1` and `d + 1` are resolved by position, and thus mapped to partition
// columns `b` and `c` of the target table.
val df = Seq((1, 2, 3, 4)).toDF("a", "b", "c", "d")
df.select('a + 1, 'b + 1, 'c + 1, 'd + 1).write.insertInto(tableName)
df.select($"a" + 1, $"b" + 1, $"c" + 1, $"d" + 1).write.insertInto(tableName)
checkAnswer(
sql(s"SELECT a, b, c, d FROM $tableName"),

View file

@ -1253,7 +1253,7 @@ class HiveDDLSuite
val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTempView(sourceViewName) {
withTable(targetTabName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
.createTempView(sourceViewName)
val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
@ -1287,7 +1287,7 @@ class HiveDDLSuite
val targetTabName = "tab2"
val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTable(sourceTabName, targetTabName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
.write.format("json").saveAsTable(sourceTabName)
val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
@ -1327,7 +1327,7 @@ class HiveDDLSuite
withTable(sourceTabName, targetTabName) {
withTempPath { dir =>
val path = dir.getCanonicalPath
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
.write.format("parquet").save(path)
sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')")
@ -1456,7 +1456,7 @@ class HiveDDLSuite
val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTable(sourceTabName, targetTabName) {
withView(sourceViewName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
.write.format("json").saveAsTable(sourceTabName)
sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName")
@ -2456,12 +2456,12 @@ class HiveDDLSuite
test("SPARK-22252: FileFormatWriter should respect the input query schema in HIVE") {
withTable("t1", "t2", "t3", "t4") {
spark.range(1).select('id as 'col1, 'id as 'col2).write.saveAsTable("t1")
spark.range(1).select($"id" as "col1", $"id" as "col2").write.saveAsTable("t1")
spark.sql("select COL1, COL2 from t1").write.format("hive").saveAsTable("t2")
checkAnswer(spark.table("t2"), Row(0, 0))
// Test picking part of the columns when writing.
spark.range(1).select('id, 'id as 'col1, 'id as 'col2).write.saveAsTable("t3")
spark.range(1).select($"id", $"id" as "col1", $"id" as "col2").write.saveAsTable("t3")
spark.sql("select COL1, COL2 from t3").write.format("hive").saveAsTable("t4")
checkAnswer(spark.table("t4"), Row(0, 0))
}

View file

@ -711,7 +711,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
}
def isExplanation(result: DataFrame): Boolean = {
val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
val explanation = result.select("plan").collect().map { case Row(plan: String) => plan }
explanation.head.startsWith("== Physical Plan ==")
}

View file

@ -85,8 +85,8 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
sql("create table spark_4959 (col1 string)")
sql("""insert into table spark_4959 select "hi" from src limit 1""")
table("spark_4959").select(
'col1.as("CaseSensitiveColName"),
'col1.as("CaseSensitiveColName2")).createOrReplaceTempView("spark_4959_2")
$"col1".as("CaseSensitiveColName"),
$"col1".as("CaseSensitiveColName2")).createOrReplaceTempView("spark_4959_2")
assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi"))
assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi"))

View file

@ -156,7 +156,7 @@ class ObjectHashAggregateSuite
)
checkAnswer(
df.groupBy($"id" % 4 as 'mod).agg(aggFunctions.head, aggFunctions.tail: _*),
df.groupBy($"id" % 4 as "mod").agg(aggFunctions.head, aggFunctions.tail: _*),
data.groupBy(_.getInt(0) % 4).map { case (key, value) =>
key -> Row.fromSeq(value.map(_.toSeq).transpose.map(_.count(_ != null): Long))
}.toSeq.map {

View file

@ -65,7 +65,8 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
options = Map.empty)(sparkSession = spark)
val logicalRelation = LogicalRelation(relation, tableMeta)
val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze
val query = Project(Seq(Symbol("i"), Symbol("p")),
Filter(Symbol("p") === 1, logicalRelation)).analyze
val optimized = Optimize.execute(query)
assert(optimized.missingInput.isEmpty)

View file

@ -2375,7 +2375,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
})
spark
.range(5)
.select(badUDF('id).as("a"))
.select(badUDF($"id").as("a"))
.createOrReplaceTempView("test")
val scriptFilePath = getTestResourcePath("data")
val e = intercept[SparkException] {

View file

@ -124,154 +124,154 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
test("filter pushdown - integer") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - long") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - float") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - double") {
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - string") {
withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - boolean") {
withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
test("filter pushdown - decimal") {
withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(
Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(
Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
@ -282,22 +282,23 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
new Timestamp(milliseconds)
}
withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
checkFilterPredicate(Literal(timestamps(0)) <=> $"_1",
PredicateLeaf.Operator.NULL_SAFE_EQUALS)
checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
}
}
@ -309,30 +310,30 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
// to produce string expression and then compare it to given string expression below.
// This might have to be changed after Hive version is upgraded.
checkFilterPredicateWithDiffHiveVersion(
'_1.isNotNull,
$"_1".isNotNull,
"""leaf-0 = (IS_NULL _1)
|expr = (not leaf-0)""".stripMargin.trim
)
checkFilterPredicateWithDiffHiveVersion(
'_1 =!= 1,
$"_1" =!= 1,
"""leaf-0 = (IS_NULL _1)
|leaf-1 = (EQUALS _1 1)
|expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
)
checkFilterPredicateWithDiffHiveVersion(
!('_1 < 4),
!($"_1" < 4),
"""leaf-0 = (IS_NULL _1)
|leaf-1 = (LESS_THAN _1 4)
|expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
)
checkFilterPredicateWithDiffHiveVersion(
'_1 < 2 || '_1 > 3,
$"_1" < 2 || $"_1" > 3,
"""leaf-0 = (LESS_THAN _1 2)
|leaf-1 = (LESS_THAN_EQUALS _1 3)
|expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
)
checkFilterPredicateWithDiffHiveVersion(
'_1 < 2 && '_1 > 3,
$"_1" < 2 && $"_1" > 3,
"""leaf-0 = (IS_NULL _1)
|leaf-1 = (LESS_THAN _1 2)
|leaf-2 = (LESS_THAN_EQUALS _1 3)
@ -347,22 +348,22 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
}
// ArrayType
withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
checkNoFilterPredicate('_1.isNull)
checkNoFilterPredicate($"_1".isNull)
}
// BinaryType
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
checkNoFilterPredicate('_1 <=> 1.b)
checkNoFilterPredicate($"_1" <=> 1.b)
}
// DateType
if (!HiveUtils.isHive23) {
val stringDate = "2015-01-01"
withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
checkNoFilterPredicate($"_1" === Date.valueOf(stringDate))
}
}
// MapType
withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
checkNoFilterPredicate('_1.isNotNull)
checkNoFilterPredicate($"_1".isNotNull)
}
}