[SPARK-21914][SQL][TESTS] Check results of expression examples
### What changes were proposed in this pull request? New test compares outputs of expression examples in comments with results of `hiveResultString()`. Also I fixed existing examples where actual and expected outputs are different. ### Why are the changes needed? This prevents mistakes in expression examples, and fixes existing mistakes in comments. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Add new test to `SQLQuerySuite`. Closes #25942 from MaxGekk/run-expr-examples. Authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
bd28e8e179
commit
4dd0066d40
|
@ -63,7 +63,7 @@ import org.apache.spark.sql.types._
|
|||
> SELECT _FUNC_(10.0, array(0.5, 0.4, 0.1), 100);
|
||||
[10.0,10.0,10.0]
|
||||
> SELECT _FUNC_(10.0, 0.5, 100);
|
||||
10.0
|
||||
10
|
||||
""",
|
||||
since = "2.1.0")
|
||||
case class ApproximatePercentile(
|
||||
|
|
|
@ -223,7 +223,7 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
|
||||
1.1135657469022013
|
||||
1.1135657469022011
|
||||
> SELECT _FUNC_(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
|
||||
-1.1135657469022011
|
||||
""",
|
||||
|
@ -245,9 +245,9 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
|
||||
-0.7014368047529618
|
||||
-0.7014368047529627
|
||||
> SELECT _FUNC_(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
|
||||
0.19432323191698986
|
||||
0.19432323191699075
|
||||
""",
|
||||
since = "1.6.0")
|
||||
case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
|
||||
|
|
|
@ -448,7 +448,7 @@ case class IntegralDivide(left: Expression, right: Expression) extends DivModLik
|
|||
usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT 2 _FUNC_ 1.8;
|
||||
> SELECT 2 % 1.8;
|
||||
0.2
|
||||
> SELECT MOD(2, 1.8);
|
||||
0.2
|
||||
|
|
|
@ -422,9 +422,9 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateName
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('a:1,b:2,c:3', ',', ':');
|
||||
map("a":"1","b":"2","c":"3")
|
||||
{"a":"1","b":"2","c":"3"}
|
||||
> SELECT _FUNC_('a');
|
||||
map("a":null)
|
||||
{"a":null}
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
|
||||
|
|
|
@ -40,8 +40,8 @@ import org.apache.spark.unsafe.types.UTF8String
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('1, 0.8', 'a INT, b DOUBLE');
|
||||
{"a":1, "b":0.8}
|
||||
> SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
|
||||
{"a":1,"b":0.8}
|
||||
> SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
|
||||
{"time":2015-08-26 00:00:00.0}
|
||||
""",
|
||||
since = "3.0.0")
|
||||
|
@ -199,7 +199,7 @@ case class SchemaOfCsv(
|
|||
> SELECT _FUNC_(named_struct('a', 1, 'b', 2));
|
||||
1,2
|
||||
> SELECT _FUNC_(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
|
||||
"26/08/2015"
|
||||
26/08/2015
|
||||
""",
|
||||
since = "3.0.0")
|
||||
// scalastyle:on line.size.limit
|
||||
|
|
|
@ -631,7 +631,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd');
|
||||
1460041200
|
||||
1460098800
|
||||
""",
|
||||
since = "1.6.0")
|
||||
case class ToUnixTimestamp(
|
||||
|
@ -842,7 +842,7 @@ abstract class UnixTime extends ToTimestamp {
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');
|
||||
1970-01-01 00:00:00
|
||||
1969-12-31 16:00:00
|
||||
""",
|
||||
since = "1.5.0")
|
||||
case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None)
|
||||
|
@ -1766,10 +1766,10 @@ case class MakeDate(year: Expression, month: Expression, day: Expression)
|
|||
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
|
||||
2014-12-28 06:30:45.887
|
||||
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
|
||||
2014-12-28 10:30:45.887
|
||||
> SELECT _FUNC_(2019, 6, 30, 23, 59, 60)
|
||||
2014-12-27 21:30:45.887
|
||||
> SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
|
||||
2019-07-01 00:00:00
|
||||
> SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 13);
|
||||
> SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
|
||||
NULL
|
||||
> SELECT _FUNC_(null, 7, 22, 15, 30, 0);
|
||||
NULL
|
||||
|
|
|
@ -127,16 +127,16 @@ case class UserDefinedGenerator(
|
|||
* 3 NULL
|
||||
* }}}
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(2, 1, 2, 3);
|
||||
1 2
|
||||
3 NULL
|
||||
1 2
|
||||
3 NULL
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class Stack(children: Seq[Expression]) extends Generator {
|
||||
|
||||
private lazy val numRows = children.head.eval().asInstanceOf[Int]
|
||||
|
@ -375,16 +375,16 @@ case class Explode(child: Expression) extends ExplodeBase {
|
|||
* 1 20
|
||||
* }}}
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(array(10,20));
|
||||
0 10
|
||||
1 20
|
||||
0 10
|
||||
1 20
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class PosExplode(child: Expression) extends ExplodeBase {
|
||||
override val position = true
|
||||
}
|
||||
|
@ -392,16 +392,16 @@ case class PosExplode(child: Expression) extends ExplodeBase {
|
|||
/**
|
||||
* Explodes an array of structs into a table.
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
|
||||
1 a
|
||||
2 b
|
||||
1 a
|
||||
2 b
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
|
||||
override val inline: Boolean = true
|
||||
override val position: Boolean = false
|
||||
|
|
|
@ -38,7 +38,7 @@ trait GroupingSet extends Expression with CodegenFallback {
|
|||
override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
|
||||
}
|
||||
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = """
|
||||
_FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
|
||||
|
@ -47,19 +47,19 @@ trait GroupingSet extends Expression with CodegenFallback {
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
|
||||
NULL 2 1
|
||||
NULL NULL 2
|
||||
Alice 2 1
|
||||
Bob 5 1
|
||||
NULL 5 1
|
||||
Bob NULL 1
|
||||
Alice NULL 1
|
||||
Bob 5 1
|
||||
Alice 2 1
|
||||
NULL NULL 2
|
||||
NULL 5 1
|
||||
Bob NULL 1
|
||||
Alice NULL 1
|
||||
NULL 2 1
|
||||
""",
|
||||
since = "2.0.0")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
|
||||
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = """
|
||||
_FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
|
||||
|
@ -68,21 +68,21 @@ case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
|
||||
NULL NULL 2
|
||||
Alice 2 1
|
||||
Bob 5 1
|
||||
Bob NULL 1
|
||||
Alice NULL 1
|
||||
Bob 5 1
|
||||
Alice 2 1
|
||||
NULL NULL 2
|
||||
Bob NULL 1
|
||||
Alice NULL 1
|
||||
""",
|
||||
since = "2.0.0")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
|
||||
|
||||
/**
|
||||
* Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
|
||||
* GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = """
|
||||
_FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
|
||||
|
@ -91,12 +91,12 @@ case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
|
||||
Alice 0 2
|
||||
NULL 1 7
|
||||
Bob 0 5
|
||||
Bob 0 5
|
||||
Alice 0 2
|
||||
NULL 1 7
|
||||
""",
|
||||
since = "2.0.0")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class Grouping(child: Expression) extends Expression with Unevaluable {
|
||||
@transient
|
||||
override lazy val references: AttributeSet =
|
||||
|
@ -111,7 +111,7 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
|
|||
*
|
||||
* If groupByExprs is empty, it means all grouping expressions in GroupingSets.
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = """
|
||||
_FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
|
||||
|
@ -120,20 +120,20 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
|
||||
NULL 2 2 165.0
|
||||
Alice 0 2 165.0
|
||||
NULL 2 5 180.0
|
||||
NULL 3 7 172.5
|
||||
Bob 0 5 180.0
|
||||
Bob 1 5 180.0
|
||||
Alice 1 2 165.0
|
||||
NULL 2 5 180.0
|
||||
Alice 0 2 165.0
|
||||
NULL 3 7 172.5
|
||||
NULL 2 2 165.0
|
||||
Bob 1 5 180.0
|
||||
Alice 1 2 165.0
|
||||
Bob 0 5 180.0
|
||||
""",
|
||||
note = """
|
||||
Input columns should match with grouping columns exactly, or empty (means all the grouping
|
||||
columns).
|
||||
""",
|
||||
since = "2.0.0")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
|
||||
@transient
|
||||
override lazy val references: AttributeSet =
|
||||
|
|
|
@ -463,7 +463,7 @@ case class ArrayExists(
|
|||
> SELECT _FUNC_(array(1, null, 3), x -> x % 2 == 0);
|
||||
false
|
||||
> SELECT _FUNC_(array(2, null, 8), x -> x % 2 == 0);
|
||||
null
|
||||
NULL
|
||||
""",
|
||||
since = "3.0.0")
|
||||
case class ArrayForAll(
|
||||
|
|
|
@ -331,15 +331,15 @@ case class GetJsonObject(json: Expression, path: Expression)
|
|||
}
|
||||
}
|
||||
|
||||
// scalastyle:off line.size.limit
|
||||
// scalastyle:off line.size.limit line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b');
|
||||
1 2
|
||||
1 2
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
// scalastyle:on line.size.limit line.contains.tab
|
||||
case class JsonTuple(children: Seq[Expression])
|
||||
extends Generator with CodegenFallback {
|
||||
|
||||
|
@ -502,9 +502,9 @@ case class JsonTuple(children: Seq[Expression])
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('{"a":1, "b":0.8}', 'a INT, b DOUBLE');
|
||||
{"a":1, "b":0.8}
|
||||
{"a":1,"b":0.8}
|
||||
> SELECT _FUNC_('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
|
||||
{"time":"2015-08-26 00:00:00.0"}
|
||||
{"time":2015-08-26 00:00:00.0}
|
||||
""",
|
||||
since = "2.2.0")
|
||||
// scalastyle:on line.size.limit
|
||||
|
|
|
@ -1291,7 +1291,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(2.5, 0);
|
||||
3.0
|
||||
3
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class Round(child: Expression, scale: Expression)
|
||||
|
@ -1311,7 +1311,7 @@ case class Round(child: Expression, scale: Expression)
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(2.5, 0);
|
||||
2.0
|
||||
2
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class BRound(child: Expression, scale: Expression)
|
||||
|
|
|
@ -96,7 +96,7 @@ abstract class StringRegexExpression extends BinaryExpression
|
|||
""",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
|
||||
> SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\Users%';
|
||||
true
|
||||
""",
|
||||
note = """
|
||||
|
@ -153,6 +153,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
|
|||
}
|
||||
}
|
||||
|
||||
// scalastyle:off line.contains.tab
|
||||
@ExpressionDescription(
|
||||
usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
|
||||
arguments = """
|
||||
|
@ -170,18 +171,20 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
|
|||
""",
|
||||
examples = """
|
||||
Examples:
|
||||
When spark.sql.parser.escapedStringLiterals is disabled (default).
|
||||
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*'
|
||||
> SET spark.sql.parser.escapedStringLiterals=true;
|
||||
spark.sql.parser.escapedStringLiterals true
|
||||
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';
|
||||
true
|
||||
|
||||
When spark.sql.parser.escapedStringLiterals is enabled.
|
||||
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*'
|
||||
> SET spark.sql.parser.escapedStringLiterals=false;
|
||||
spark.sql.parser.escapedStringLiterals false
|
||||
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*';
|
||||
true
|
||||
""",
|
||||
note = """
|
||||
Use LIKE to match with simple string pattern.
|
||||
""",
|
||||
since = "1.0.0")
|
||||
// scalastyle:on line.contains.tab
|
||||
case class RLike(left: Expression, right: Expression) extends StringRegexExpression {
|
||||
|
||||
override def escape(v: String): String = v
|
||||
|
|
|
@ -847,7 +847,7 @@ object StringTrimLeft {
|
|||
usage = """
|
||||
_FUNC_(str) - Removes the leading space characters from `str`.
|
||||
|
||||
_FUNC_(trimStr, str) - Removes the leading string contains the characters from the trim string
|
||||
_FUNC_(str, trimStr) - Removes the leading string contains the characters from the trim string
|
||||
""",
|
||||
arguments = """
|
||||
Arguments:
|
||||
|
@ -858,7 +858,7 @@ object StringTrimLeft {
|
|||
Examples:
|
||||
> SELECT _FUNC_(' SparkSQL ');
|
||||
SparkSQL
|
||||
> SELECT _FUNC_('Sp', 'SSparkSQLS');
|
||||
> SELECT _FUNC_('SparkSQLS', 'Sp');
|
||||
arkSQLS
|
||||
""",
|
||||
since = "1.5.0")
|
||||
|
@ -949,7 +949,7 @@ object StringTrimRight {
|
|||
usage = """
|
||||
_FUNC_(str) - Removes the trailing space characters from `str`.
|
||||
|
||||
_FUNC_(trimStr, str) - Removes the trailing string which contains the characters from the trim string from the `str`
|
||||
_FUNC_(str, trimStr) - Removes the trailing string which contains the characters from the trim string from the `str`
|
||||
""",
|
||||
arguments = """
|
||||
Arguments:
|
||||
|
@ -960,7 +960,7 @@ object StringTrimRight {
|
|||
Examples:
|
||||
> SELECT _FUNC_(' SparkSQL ');
|
||||
SparkSQL
|
||||
> SELECT _FUNC_('LQSa', 'SSparkSQLS');
|
||||
> SELECT _FUNC_('SSparkSQLS', 'SQLS');
|
||||
SSpark
|
||||
""",
|
||||
since = "1.5.0")
|
||||
|
@ -1299,11 +1299,11 @@ object ParseUrl {
|
|||
usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST')
|
||||
> SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
|
||||
spark.apache.org
|
||||
> SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY')
|
||||
> SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
|
||||
query=1
|
||||
> SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query')
|
||||
> SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
|
||||
1
|
||||
""",
|
||||
since = "2.0.0")
|
||||
|
|
|
@ -194,7 +194,7 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
|
|||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
|
||||
['b1','b2','b3']
|
||||
["b1","b2","b3"]
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.spark.{AccumulatorSuite, SparkException}
|
|||
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
|
||||
import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
|
||||
import org.apache.spark.sql.catalyst.util.StringUtils
|
||||
import org.apache.spark.sql.execution.HiveResult.hiveResultString
|
||||
import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
|
||||
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
|
||||
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
|
||||
|
@ -127,7 +128,9 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
|
|||
// _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
|
||||
"org.apache.spark.sql.catalyst.expressions.Remainder",
|
||||
// Examples demonstrate alternative names, see SPARK-20749
|
||||
"org.apache.spark.sql.catalyst.expressions.Length")
|
||||
"org.apache.spark.sql.catalyst.expressions.Length",
|
||||
// Uses settings without _FUNC_ in `SET spark.sql.parser.escapedStringLiterals=true`
|
||||
"org.apache.spark.sql.catalyst.expressions.RLike")
|
||||
spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
|
||||
val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
|
||||
val className = info.getClassName
|
||||
|
@ -140,6 +143,53 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
|
|||
}
|
||||
}
|
||||
|
||||
test("check outputs of expression examples") {
|
||||
def unindentAndTrim(s: String): String = {
|
||||
s.replaceAll("\n\\s+", "\n").trim
|
||||
}
|
||||
val beginSqlStmtRe = " > ".r
|
||||
val endSqlStmtRe = ";\n".r
|
||||
def checkExampleSyntax(example: String): Unit = {
|
||||
val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
|
||||
val endStmtNum = endSqlStmtRe.findAllIn(example).length
|
||||
assert(beginStmtNum === endStmtNum,
|
||||
"The number of ` > ` does not match to the number of `;`")
|
||||
}
|
||||
val exampleRe = """^(.+);\n(?s)(.+)$""".r
|
||||
val ignoreSet = Set(
|
||||
// One of examples shows getting the current timestamp
|
||||
"org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
|
||||
// Random output without a seed
|
||||
"org.apache.spark.sql.catalyst.expressions.Rand",
|
||||
"org.apache.spark.sql.catalyst.expressions.Randn",
|
||||
"org.apache.spark.sql.catalyst.expressions.Shuffle",
|
||||
"org.apache.spark.sql.catalyst.expressions.Uuid",
|
||||
// The example calls methods that return unstable results.
|
||||
"org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
|
||||
|
||||
withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
|
||||
spark.sessionState.functionRegistry.listFunction().par.foreach { funcId =>
|
||||
val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
|
||||
val className = info.getClassName
|
||||
if (!ignoreSet.contains(className)) {
|
||||
withClue(s"Function '${info.getName}', Expression class '$className'") {
|
||||
val example = info.getExamples
|
||||
checkExampleSyntax(example)
|
||||
example.split(" > ").toList.foreach(_ match {
|
||||
case exampleRe(sql, output) =>
|
||||
val df = spark.sql(sql)
|
||||
val actual = unindentAndTrim(
|
||||
hiveResultString(df.queryExecution.executedPlan).mkString("\n"))
|
||||
val expected = unindentAndTrim(output)
|
||||
assert(actual === expected)
|
||||
case _ =>
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-6743: no columns from cache") {
|
||||
Seq(
|
||||
(83, 0, 38),
|
||||
|
|
Loading…
Reference in a new issue