[SPARK-8348][SQL] Add in operator to DataFrame Column
I have added it for only Scala. TODO: we should also support `in` operator in Python. Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #6824 from yu-iskw/SPARK-8348 and squashes the following commits: e76d02f [Yu ISHIKAWA] Not use infix notation 6f744ac [Yu ISHIKAWA] Fit the test cases because these used the old test data set. 00077d3 [Yu ISHIKAWA] [SPARK-8348][SQL] Add in operator to DataFrame Column
This commit is contained in:
parent
a71cbbdea5
commit
754929b153
|
@ -621,7 +621,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
|
||||||
* @since 1.3.0
|
* @since 1.3.0
|
||||||
*/
|
*/
|
||||||
@scala.annotation.varargs
|
@scala.annotation.varargs
|
||||||
def in(list: Column*): Column = In(expr, list.map(_.expr))
|
def in(list: Any*): Column = In(expr, list.map(lit(_).expr))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SQL like expression.
|
* SQL like expression.
|
||||||
|
|
|
@ -296,6 +296,22 @@ class ColumnExpressionSuite extends QueryTest {
|
||||||
checkAnswer(testData.filter($"a".between($"b", $"c")), expectAnswer)
|
checkAnswer(testData.filter($"a".between($"b", $"c")), expectAnswer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("in") {
|
||||||
|
val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
|
||||||
|
checkAnswer(df.filter($"a".in(1, 2)),
|
||||||
|
df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
|
||||||
|
checkAnswer(df.filter($"a".in(3, 2)),
|
||||||
|
df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
|
||||||
|
checkAnswer(df.filter($"a".in(3, 1)),
|
||||||
|
df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
|
||||||
|
checkAnswer(df.filter($"b".in("y", "x")),
|
||||||
|
df.collect().toSeq.filter(r => r.getString(1) == "y" || r.getString(1) == "x"))
|
||||||
|
checkAnswer(df.filter($"b".in("z", "x")),
|
||||||
|
df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "x"))
|
||||||
|
checkAnswer(df.filter($"b".in("z", "y")),
|
||||||
|
df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "y"))
|
||||||
|
}
|
||||||
|
|
||||||
val booleanData = ctx.createDataFrame(ctx.sparkContext.parallelize(
|
val booleanData = ctx.createDataFrame(ctx.sparkContext.parallelize(
|
||||||
Row(false, false) ::
|
Row(false, false) ::
|
||||||
Row(false, true) ::
|
Row(false, true) ::
|
||||||
|
|
Loading…
Reference in a new issue