[SPARK-14415][SQL] All functions should show usages by command DESC FUNCTION

## What changes were proposed in this pull request?

Currently, many functions do now show usages like the followings.
```
scala> sql("desc function extended `sin`").collect().foreach(println)
[Function: sin]
[Class: org.apache.spark.sql.catalyst.expressions.Sin]
[Usage: To be added.]
[Extended Usage:
To be added.]
```

This PR adds descriptions for functions and adds a testcase prevent adding function without usage.
```
scala>  sql("desc function extended `sin`").collect().foreach(println);
[Function: sin]
[Class: org.apache.spark.sql.catalyst.expressions.Sin]
[Usage: sin(x) - Returns the sine of x.]
[Extended Usage:
> SELECT sin(0);
 0.0]
```

The only exceptions are `cube`, `grouping`, `grouping_id`, `rollup`, `window`.

## How was this patch tested?

Pass the Jenkins tests (including new testcases.)

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #12185 from dongjoon-hyun/SPARK-14415.
This commit is contained in:
Dongjoon Hyun 2016-04-10 11:46:45 -07:00 committed by Yin Huai
parent b5c785629a
commit a7ce473bd0
28 changed files with 489 additions and 25 deletions

View file

@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the mean calculated from values of a group.")
case class Average(child: Expression) extends DeclarativeAggregate {
override def prettyName: String = "avg"

View file

@ -130,6 +130,10 @@ abstract class CentralMomentAgg(child: Expression) extends DeclarativeAggregate
}
// Compute the population standard deviation of a column
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the population standard deviation calculated from values of a group.")
// scalastyle:on line.size.limit
case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@ -143,6 +147,8 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
}
// Compute the sample standard deviation of a column
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sample standard deviation calculated from values of a group.")
case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@ -157,6 +163,8 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
}
// Compute the population variance of a column
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the population variance calculated from values of a group.")
case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@ -170,6 +178,8 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
}
// Compute the sample variance of a column
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sample variance calculated from values of a group.")
case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 2
@ -183,6 +193,8 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
override def prettyName: String = "var_samp"
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the Skewness value calculated from values of a group.")
case class Skewness(child: Expression) extends CentralMomentAgg(child) {
override def prettyName: String = "skewness"
@ -196,6 +208,8 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
}
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the Kurtosis value calculated from values of a group.")
case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
override protected def momentOrder = 4

View file

@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
* Definition of Pearson correlation can be found at
* http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
*/
@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns Pearson coefficient of correlation between a set of number pairs.")
case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = Seq(x, y)

View file

@ -21,6 +21,12 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """_FUNC_(*) - Returns the total number of retrieved rows, including rows containing NULL values.
_FUNC_(expr) - Returns the number of rows for which the supplied expression is non-NULL.
_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.""")
// scalastyle:on line.size.limit
case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
override def nullable: Boolean = false

View file

@ -76,6 +76,8 @@ abstract class Covariance(x: Expression, y: Expression) extends DeclarativeAggre
}
}
@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs.")
case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) {
override val evaluateExpression: Expression = {
If(n === Literal(0.0), Literal.create(null, DoubleType),
@ -85,6 +87,8 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance
}
@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns the sample covariance of a set of number pairs.")
case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) {
override val evaluateExpression: Expression = {
If(n === Literal(0.0), Literal.create(null, DoubleType),

View file

@ -28,6 +28,11 @@ import org.apache.spark.sql.types._
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
*/
@ExpressionDescription(
usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows.
_FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows.
If isIgnoreNull is true, returns only non-null values.
""")
case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
def this(child: Expression) = this(child, Literal.create(false, BooleanType))

View file

@ -20,8 +20,6 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
import java.lang.{Long => JLong}
import java.util
import com.clearspring.analytics.hash.MurmurHash
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
@ -48,6 +46,11 @@ import org.apache.spark.sql.types._
* @param relativeSD the maximum estimation error allowed.
*/
// scalastyle:on
@ExpressionDescription(
usage = """_FUNC_(expr) - Returns the estimated cardinality by HyperLogLog++.
_FUNC_(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++
with relativeSD, the maximum estimation error allowed.
""")
case class HyperLogLogPlusPlus(
child: Expression,
relativeSD: Double = 0.05,

View file

@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
*/
@ExpressionDescription(
usage = "_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows.")
case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
def this(child: Expression) = this(child, Literal.create(false, BooleanType))

View file

@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the maximum value of expr.")
case class Max(child: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = child :: Nil

View file

@ -22,7 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the minimum value of expr.")
case class Min(child: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = child :: Nil

View file

@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sum calculated from values of a group.")
case class Sum(child: Expression) extends DeclarativeAggregate {
override def children: Seq[Expression] = child :: Nil

View file

@ -23,7 +23,8 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
@ExpressionDescription(
usage = "_FUNC_(a) - Returns -a.")
case class UnaryMinus(child: Expression) extends UnaryExpression
with ExpectsInputTypes with NullIntolerant {
@ -59,6 +60,8 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
override def sql: String = s"(-${child.sql})"
}
@ExpressionDescription(
usage = "_FUNC_(a) - Returns a.")
case class UnaryPositive(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
override def prettyName: String = "positive"
@ -79,8 +82,8 @@ case class UnaryPositive(child: Expression)
* A function that get the absolute value of the numeric value.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value",
extended = "> SELECT _FUNC_('-1');\n1")
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.",
extended = "> SELECT _FUNC_('-1');\n 1")
case class Abs(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
@ -126,6 +129,8 @@ private[sql] object BinaryArithmetic {
def unapply(e: BinaryArithmetic): Option[(Expression, Expression)] = Some((e.left, e.right))
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns a+b.")
case class Add(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {
override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
@ -155,6 +160,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic wit
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns a-b.")
case class Subtract(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@ -185,6 +192,8 @@ case class Subtract(left: Expression, right: Expression)
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Multiplies a by b.")
case class Multiply(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@ -198,6 +207,9 @@ case class Multiply(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
}
@ExpressionDescription(
usage = "a _FUNC_ b - Divides a by b.",
extended = "> SELECT 3 _FUNC_ 2;\n 1.5")
case class Divide(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@ -275,6 +287,8 @@ case class Divide(left: Expression, right: Expression)
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns the remainder when dividing a by b.")
case class Remainder(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {
@ -464,6 +478,9 @@ case class MinOf(left: Expression, right: Expression)
override def symbol: String = "min"
}
@ExpressionDescription(
usage = "_FUNC_(a, b) - Returns the positive modulo",
extended = "> SELECT _FUNC_(10,3);\n 1")
case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {
override def toString: String = s"pmod($left, $right)"

View file

@ -26,6 +26,9 @@ import org.apache.spark.sql.types._
*
* Code generation inherited from BinaryArithmetic.
*/
@ExpressionDescription(
usage = "a _FUNC_ b - Bitwise AND.",
extended = "> SELECT 3 _FUNC_ 5; 1")
case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
override def inputType: AbstractDataType = IntegralType
@ -51,6 +54,9 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
*
* Code generation inherited from BinaryArithmetic.
*/
@ExpressionDescription(
usage = "a _FUNC_ b - Bitwise OR.",
extended = "> SELECT 3 _FUNC_ 5; 7")
case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
override def inputType: AbstractDataType = IntegralType
@ -76,6 +82,9 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
*
* Code generation inherited from BinaryArithmetic.
*/
@ExpressionDescription(
usage = "a _FUNC_ b - Bitwise exclusive OR.",
extended = "> SELECT 3 _FUNC_ 5; 2")
case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
override def inputType: AbstractDataType = IntegralType
@ -99,6 +108,9 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
/**
* A function that calculates bitwise not(~) of a number.
*/
@ExpressionDescription(
usage = "_FUNC_ b - Bitwise NOT.",
extended = "> SELECT _FUNC_ 0; -1")
case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)

View file

@ -26,6 +26,8 @@ import org.apache.spark.sql.types._
/**
* Given an array or map, returns its size.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the size of an array or a map.")
case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType))
@ -44,6 +46,11 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType
* Sorts the input array in ascending / descending order according to the natural ordering of
* the array elements and returns it.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.",
extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'));\n 'a', 'b', 'c', 'd'")
// scalastyle:on line.size.limit
case class SortArray(base: Expression, ascendingOrder: Expression)
extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
@ -125,6 +132,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
/**
* Checks if the array (left) has the element (right)
*/
@ExpressionDescription(
usage = "_FUNC_(array, value) - Returns TRUE if the array contains value.",
extended = " > SELECT _FUNC_(array(1, 2, 3), 2);\n true")
case class ArrayContains(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

View file

@ -27,6 +27,8 @@ import org.apache.spark.unsafe.types.UTF8String
/**
* Returns an Array containing the evaluation of all children expressions.
*/
@ExpressionDescription(
usage = "_FUNC_(n0, ...) - Returns an array with the given elements.")
case class CreateArray(children: Seq[Expression]) extends Expression {
override def foldable: Boolean = children.forall(_.foldable)
@ -73,6 +75,8 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
* Returns a catalyst Map containing the evaluation of all children expressions as keys and values.
* The children are a flatted sequence of kv pairs, e.g. (key1, value1, key2, value2, ...)
*/
@ExpressionDescription(
usage = "_FUNC_(key0, value0, key1, value1...) - Creates a map with the given key/value pairs.")
case class CreateMap(children: Seq[Expression]) extends Expression {
private[sql] lazy val keys = children.indices.filter(_ % 2 == 0).map(children)
private[sql] lazy val values = children.indices.filter(_ % 2 != 0).map(children)
@ -153,6 +157,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
/**
* Returns a Row containing the evaluation of all children expressions.
*/
@ExpressionDescription(
usage = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.")
case class CreateStruct(children: Seq[Expression]) extends Expression {
override def foldable: Boolean = children.forall(_.foldable)
@ -204,6 +210,10 @@ case class CreateStruct(children: Seq[Expression]) extends Expression {
*
* @param children Seq(name1, val1, name2, val2, ...)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.")
// scalastyle:on line.size.limit
case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
/**

View file

@ -23,7 +23,10 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(expr1,expr2,expr3) - If expr1 is TRUE then IF() returns expr2; otherwise it returns expr3.")
// scalastyle:on line.size.limit
case class If(predicate: Expression, trueValue: Expression, falseValue: Expression)
extends Expression {
@ -85,6 +88,10 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
* @param branches seq of (branch condition, branch value)
* @param elseValue optional value for the else branch
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END - When a = true, returns b; when c = true, return d; else return e.")
// scalastyle:on line.size.limit
case class CaseWhen(branches: Seq[(Expression, Expression)], elseValue: Option[Expression] = None)
extends Expression with CodegenFallback {
@ -256,6 +263,8 @@ object CaseKeyWhen {
* A function that returns the least value of all parameters, skipping null values.
* It takes at least 2 parameters, and returns null iff all parameters are null.
*/
@ExpressionDescription(
usage = "_FUNC_(n1, ...) - Returns the least value of all parameters, skipping null values.")
case class Least(children: Seq[Expression]) extends Expression {
override def nullable: Boolean = children.forall(_.nullable)
@ -315,6 +324,8 @@ case class Least(children: Seq[Expression]) extends Expression {
* A function that returns the greatest value of all parameters, skipping null values.
* It takes at least 2 parameters, and returns null iff all parameters are null.
*/
@ExpressionDescription(
usage = "_FUNC_(n1, ...) - Returns the greatest value of all parameters, skipping null values.")
case class Greatest(children: Seq[Expression]) extends Expression {
override def nullable: Boolean = children.forall(_.nullable)

View file

@ -35,6 +35,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
*
* There is no code generation since this expression should get constant folded by the optimizer.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current date at the start of query evaluation.")
case class CurrentDate() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false
@ -54,6 +56,8 @@ case class CurrentDate() extends LeafExpression with CodegenFallback {
*
* There is no code generation since this expression should get constant folded by the optimizer.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.")
case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false
@ -70,6 +74,9 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
/**
* Adds a number of days to startdate.
*/
@ExpressionDescription(
usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days after start_date.",
extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-31'")
case class DateAdd(startDate: Expression, days: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -96,6 +103,9 @@ case class DateAdd(startDate: Expression, days: Expression)
/**
* Subtracts a number of days to startdate.
*/
@ExpressionDescription(
usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days before start_date.",
extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-29'")
case class DateSub(startDate: Expression, days: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
override def left: Expression = startDate
@ -118,6 +128,9 @@ case class DateSub(startDate: Expression, days: Expression)
override def prettyName: String = "date_sub"
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the hour component of the string/timestamp/interval.",
extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 12")
case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@ -134,6 +147,9 @@ case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInpu
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the minute component of the string/timestamp/interval.",
extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 58")
case class Minute(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@ -150,6 +166,9 @@ case class Minute(child: Expression) extends UnaryExpression with ImplicitCastIn
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the second component of the string/timestamp/interval.",
extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 59")
case class Second(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@ -166,6 +185,9 @@ case class Second(child: Expression) extends UnaryExpression with ImplicitCastIn
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the day of year of date/timestamp.",
extended = "> SELECT _FUNC_('2016-04-09');\n 100")
case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@ -182,7 +204,9 @@ case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCas
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the year component of the date/timestamp/interval.",
extended = "> SELECT _FUNC_('2016-07-30');\n 2016")
case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@ -199,6 +223,8 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the quarter of the year for date, in the range 1 to 4.")
case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@ -215,6 +241,9 @@ case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastI
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the month component of the date/timestamp/interval",
extended = "> SELECT _FUNC_('2016-07-30');\n 7")
case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@ -231,6 +260,9 @@ case class Month(child: Expression) extends UnaryExpression with ImplicitCastInp
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the day of month of date/timestamp, or the day of interval.",
extended = "> SELECT _FUNC_('2009-07-30');\n 30")
case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@ -247,6 +279,9 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa
}
}
@ExpressionDescription(
usage = "_FUNC_(param) - Returns the week of the year of the given date.",
extended = "> SELECT _FUNC_('2008-02-20');\n 8")
case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@ -283,6 +318,11 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
}
}
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(date/timestamp/string, fmt) - Converts a date/timestamp/string to a value of string in the format specified by the date format fmt.",
extended = "> SELECT _FUNC_('2016-04-08', 'y')\n '2016'")
// scalastyle:on line.size.limit
case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {
@ -310,6 +350,8 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
* Converts time string with given pattern.
* Deterministic version of [[UnixTimestamp]], must have at least one parameter.
*/
@ExpressionDescription(
usage = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp of the give time.")
case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime {
override def left: Expression = timeExp
override def right: Expression = format
@ -331,6 +373,8 @@ case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends Unix
* If the first parameter is a Date or Timestamp instead of String, we will ignore the
* second parameter.
*/
@ExpressionDescription(
usage = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp of current or specified time.")
case class UnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime {
override def left: Expression = timeExp
override def right: Expression = format
@ -459,6 +503,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
* format. If the format is missing, using format like "1970-01-01 00:00:00".
* Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
*/
@ExpressionDescription(
usage = "_FUNC_(unix_time, format) - Returns unix_time in the specified format",
extended = "> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');\n '1970-01-01 00:00:00'")
case class FromUnixTime(sec: Expression, format: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -544,6 +591,9 @@ case class FromUnixTime(sec: Expression, format: Expression)
/**
* Returns the last day of the month which the date belongs to.
*/
@ExpressionDescription(
usage = "_FUNC_(date) - Returns the last day of the month which the date belongs to.",
extended = "> SELECT _FUNC_('2009-01-12');\n '2009-01-31'")
case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def child: Expression = startDate
@ -570,6 +620,11 @@ case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitC
*
* Allowed "dayOfWeek" is defined in [[DateTimeUtils.getDayOfWeekFromString]].
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than start_date and named as indicated.",
extended = "> SELECT _FUNC_('2015-01-14', 'TU');\n '2015-01-20'")
// scalastyle:on line.size.limit
case class NextDay(startDate: Expression, dayOfWeek: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -654,6 +709,10 @@ case class TimeAdd(start: Expression, interval: Expression)
/**
* Assumes given timestamp is UTC and converts to given timezone.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is UTC and converts to given timezone.")
// scalastyle:on line.size.limit
case class FromUTCTimestamp(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -729,6 +788,9 @@ case class TimeSub(start: Expression, interval: Expression)
/**
* Returns the date that is num_months after start_date.
*/
@ExpressionDescription(
usage = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.",
extended = "> SELECT _FUNC_('2016-08-31', 1);\n '2016-09-30'")
case class AddMonths(startDate: Expression, numMonths: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -756,6 +818,9 @@ case class AddMonths(startDate: Expression, numMonths: Expression)
/**
* Returns number of months between dates date1 and date2.
*/
@ExpressionDescription(
usage = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2.",
extended = "> SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677")
case class MonthsBetween(date1: Expression, date2: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -783,6 +848,10 @@ case class MonthsBetween(date1: Expression, date2: Expression)
/**
* Assumes given timestamp is in given timezone and converts to UTC.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is in given timezone and converts to UTC.")
// scalastyle:on line.size.limit
case class ToUTCTimestamp(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -830,6 +899,9 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
/**
* Returns the date part of a timestamp or string.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Extracts the date part of the date or datetime expression expr.",
extended = "> SELECT _FUNC_('2009-07-30 04:17:52');\n '2009-07-30'")
case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
// Implicit casting of spark will accept string in both date and timestamp format, as
@ -850,6 +922,11 @@ case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastIn
/**
* Returns date truncated to the unit specified by the format.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(date, fmt) - Returns returns date with the time portion of the day truncated to the unit specified by the format model fmt.",
extended = "> SELECT _FUNC_('2009-02-12', 'MM')\n '2009-02-01'\n> SELECT _FUNC_('2015-10-27', 'YEAR');\n '2015-01-01'")
// scalastyle:on line.size.limit
case class TruncDate(date: Expression, format: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
override def left: Expression = date
@ -921,6 +998,9 @@ case class TruncDate(date: Expression, format: Expression)
/**
* Returns the number of days from startDate to endDate.
*/
@ExpressionDescription(
usage = "_FUNC_(date1, date2) - Returns the number of days between date1 and date2.",
extended = "> SELECT _FUNC_('2009-07-30', '2009-07-31');\n 1")
case class DateDiff(endDate: Expression, startDate: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

View file

@ -99,6 +99,10 @@ case class UserDefinedGenerator(
/**
* Given an input array produces a sequence of rows for each value in the array.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(a) - Separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns.")
// scalastyle:on line.size.limit
case class Explode(child: Expression) extends UnaryExpression with Generator with CodegenFallback {
override def children: Seq[Expression] = child :: Nil

View file

@ -106,6 +106,8 @@ private[this] object SharedFactory {
* Extracts json object from a json string based on json path specified, and returns json string
* of the extracted json object. It will return null if the input json string is invalid.
*/
@ExpressionDescription(
usage = "_FUNC_(json_txt, path) - Extract a json object from path")
case class GetJsonObject(json: Expression, path: Expression)
extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
@ -319,6 +321,10 @@ case class GetJsonObject(json: Expression, path: Expression)
}
}
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string.")
// scalastyle:on line.size.limit
case class JsonTuple(children: Seq[Expression])
extends Generator with CodegenFallback {

View file

@ -50,6 +50,7 @@ abstract class LeafMathExpression(c: Double, name: String)
/**
* A unary expression specifically for math functions. Math Functions expect a specific type of
* input format, therefore these functions extend `ExpectsInputTypes`.
*
* @param f The math function.
* @param name The short name of the function
*/
@ -103,6 +104,7 @@ abstract class UnaryLogExpression(f: Double => Double, name: String)
/**
* A binary expression specifically for math functions that take two `Double`s as input and returns
* a `Double`.
*
* @param f The math function.
* @param name The short name of the function
*/
@ -136,12 +138,18 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String)
* Euler's number. Note that there is no code generation because this is only
* evaluated by the optimizer during constant folding.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns Euler's number, E.",
extended = "> SELECT _FUNC_();\n 2.718281828459045")
case class EulerNumber() extends LeafMathExpression(math.E, "E")
/**
* Pi. Note that there is no code generation because this is only
* evaluated by the optimizer during constant folding.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns PI.",
extended = "> SELECT _FUNC_();\n 3.141592653589793")
case class Pi() extends LeafMathExpression(math.Pi, "PI")
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -150,14 +158,29 @@ case class Pi() extends LeafMathExpression(math.Pi, "PI")
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the arc cosine of x if -1<=x<=1 or NaN otherwise.",
extended = "> SELECT _FUNC_(1);\n 0.0\n> SELECT _FUNC_(2);\n NaN")
case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the arc sin of x if -1<=x<=1 or NaN otherwise.",
extended = "> SELECT _FUNC_(0);\n 0.0\n> SELECT _FUNC_(2);\n NaN")
case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the arc tangent.",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the cube root of a double value.",
extended = "> SELECT _FUNC_(27.0);\n 3.0")
case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the smallest integer not smaller than x.",
extended = "> SELECT _FUNC_(-0.1);\n 0\n> SELECT _FUNC_(5);\n 5")
case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") {
override def dataType: DataType = child.dataType match {
case dt @ DecimalType.Fixed(_, 0) => dt
@ -184,16 +207,26 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
}
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the cosine of x.",
extended = "> SELECT _FUNC_(0);\n 1.0")
case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the hyperbolic cosine of x.",
extended = "> SELECT _FUNC_(0);\n 1.0")
case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH")
/**
* Convert a num from one base to another
*
* @param numExpr the number to be converted
* @param fromBaseExpr from which base
* @param toBaseExpr to which base
*/
@ExpressionDescription(
usage = "_FUNC_(num, from_base, to_base) - Convert num from from_base to to_base.",
extended = "> SELECT _FUNC_('100', 2, 10);\n '4'\n> SELECT _FUNC_(-10, 16, -10);\n '16'")
case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -222,10 +255,19 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre
}
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns e to the power of x.",
extended = "> SELECT _FUNC_(0);\n 1.0")
case class Exp(child: Expression) extends UnaryMathExpression(math.exp, "EXP")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns exp(x) - 1.",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Expm1(child: Expression) extends UnaryMathExpression(math.expm1, "EXPM1")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the largest integer not greater than x.",
extended = "> SELECT _FUNC_(-0.1);\n -1\n> SELECT _FUNC_(5);\n 5")
case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") {
override def dataType: DataType = child.dataType match {
case dt @ DecimalType.Fixed(_, 0) => dt
@ -283,6 +325,9 @@ object Factorial {
)
}
@ExpressionDescription(
usage = "_FUNC_(n) - Returns n factorial for n is [0..20]. Otherwise, NULL.",
extended = "> SELECT _FUNC_(5);\n 120")
case class Factorial(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[DataType] = Seq(IntegerType)
@ -315,8 +360,14 @@ case class Factorial(child: Expression) extends UnaryExpression with ImplicitCas
}
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the natural logarithm of x with base e.",
extended = "> SELECT _FUNC_(1);\n 0.0")
case class Log(child: Expression) extends UnaryLogExpression(math.log, "LOG")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the logarithm of x with base 2.",
extended = "> SELECT _FUNC_(2);\n 1.0")
case class Log2(child: Expression)
extends UnaryLogExpression((x: Double) => math.log(x) / math.log(2), "LOG2") {
override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
@ -332,36 +383,72 @@ case class Log2(child: Expression)
}
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the logarithm of x with base 10.",
extended = "> SELECT _FUNC_(10);\n 1.0")
case class Log10(child: Expression) extends UnaryLogExpression(math.log10, "LOG10")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns log(1 + x).",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Log1p(child: Expression) extends UnaryLogExpression(math.log1p, "LOG1P") {
protected override val yAsymptote: Double = -1.0
}
@ExpressionDescription(
usage = "_FUNC_(x, d) - Return the rounded x at d decimal places.",
extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3")
case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") {
override def funcName: String = "rint"
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sign of x.",
extended = "> SELECT _FUNC_(40);\n 1.0")
case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sine of x.",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the hyperbolic sine of x.",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the square root of x.",
extended = "> SELECT _FUNC_(4);\n 2.0")
case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the tangent of x.",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN")
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the hyperbolic tangent of x.",
extended = "> SELECT _FUNC_(0);\n 0.0")
case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH")
@ExpressionDescription(
usage = "_FUNC_(x) - Converts radians to degrees.",
extended = "> SELECT _FUNC_(3.141592653589793);\n 180.0")
case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") {
override def funcName: String = "toDegrees"
}
@ExpressionDescription(
usage = "_FUNC_(x) - Converts degrees to radians.",
extended = "> SELECT _FUNC_(180);\n 3.141592653589793")
case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") {
override def funcName: String = "toRadians"
}
@ExpressionDescription(
usage = "_FUNC_(x) - Returns x in binary.",
extended = "> SELECT _FUNC_(13);\n '1101'")
case class Bin(child: Expression)
extends UnaryExpression with Serializable with ImplicitCastInputTypes {
@ -453,6 +540,9 @@ object Hex {
* Otherwise if the number is a STRING, it converts each character into its hex representation
* and returns the resulting STRING. Negative numbers would be treated as two's complement.
*/
@ExpressionDescription(
usage = "_FUNC_(x) - Convert the argument to hexadecimal.",
extended = "> SELECT _FUNC_(17);\n '11'\n> SELECT _FUNC_('Spark SQL');\n '537061726B2053514C'")
case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] =
@ -481,6 +571,9 @@ case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInput
* Performs the inverse operation of HEX.
* Resulting characters are returned as a byte array.
*/
@ExpressionDescription(
usage = "_FUNC_(x) - Converts hexadecimal argument to binary.",
extended = "> SELECT decode(_FUNC_('537061726B2053514C'),'UTF-8');\n 'Spark SQL'")
case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
@ -509,7 +602,9 @@ case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInp
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns the arc tangent2.",
extended = "> SELECT _FUNC_(0, 0);\n 0.0")
case class Atan2(left: Expression, right: Expression)
extends BinaryMathExpression(math.atan2, "ATAN2") {
@ -523,6 +618,9 @@ case class Atan2(left: Expression, right: Expression)
}
}
@ExpressionDescription(
usage = "_FUNC_(x1, x2) - Raise x1 to the power of x2.",
extended = "> SELECT _FUNC_(2, 3);\n 8.0")
case class Pow(left: Expression, right: Expression)
extends BinaryMathExpression(math.pow, "POWER") {
override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
@ -532,10 +630,14 @@ case class Pow(left: Expression, right: Expression)
/**
* Bitwise unsigned left shift.
* Bitwise left shift.
*
* @param left the base number to shift.
* @param right number of bits to left shift.
*/
@ExpressionDescription(
usage = "_FUNC_(a, b) - Bitwise left shift.",
extended = "> SELECT _FUNC_(2, 1);\n 4")
case class ShiftLeft(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -558,10 +660,14 @@ case class ShiftLeft(left: Expression, right: Expression)
/**
* Bitwise unsigned left shift.
* Bitwise right shift.
*
* @param left the base number to shift.
* @param right number of bits to left shift.
* @param right number of bits to right shift.
*/
@ExpressionDescription(
usage = "_FUNC_(a, b) - Bitwise right shift.",
extended = "> SELECT _FUNC_(4, 1);\n 2")
case class ShiftRight(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -585,9 +691,13 @@ case class ShiftRight(left: Expression, right: Expression)
/**
* Bitwise unsigned right shift, for integer and long data type.
*
* @param left the base number.
* @param right the number of bits to right shift.
*/
@ExpressionDescription(
usage = "_FUNC_(a, b) - Bitwise unsigned right shift.",
extended = "> SELECT _FUNC_(4, 1);\n 2")
case class ShiftRightUnsigned(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -608,16 +718,22 @@ case class ShiftRightUnsigned(left: Expression, right: Expression)
}
}
@ExpressionDescription(
usage = "_FUNC_(a, b) - Returns sqrt(a**2 + b**2).",
extended = "> SELECT _FUNC_(3, 4);\n 5.0")
case class Hypot(left: Expression, right: Expression)
extends BinaryMathExpression(math.hypot, "HYPOT")
/**
* Computes the logarithm of a number.
*
* @param left the logarithm base, default to e.
* @param right the number to compute the logarithm of.
*/
@ExpressionDescription(
usage = "_FUNC_(b, x) - Returns the logarithm of x with base b.",
extended = "> SELECT _FUNC_(10, 100);\n 2.0")
case class Logarithm(left: Expression, right: Expression)
extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
@ -674,6 +790,9 @@ case class Logarithm(left: Expression, right: Expression)
* @param child expr to be round, all [[NumericType]] is allowed as Input
* @param scale new scale to be round to, this should be a constant int at runtime
*/
@ExpressionDescription(
usage = "_FUNC_(x, d) - Round x to d decimal places.",
extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3")
case class Round(child: Expression, scale: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

View file

@ -438,6 +438,8 @@ abstract class InterpretedHashFunction {
* We should use this hash function for both shuffle and bucket, so that we can guarantee shuffle
* and bucketing have same data distribution.
*/
@ExpressionDescription(
usage = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments.")
case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
def this(arguments: Seq[Expression]) = this(arguments, 42)

View file

@ -34,6 +34,9 @@ import org.apache.spark.sql.types._
* coalesce(null, null, null) => null
* }}}
*/
@ExpressionDescription(
usage = "_FUNC_(a1, a2, ...) - Returns the first non-null argument if exists. Otherwise, NULL.",
extended = "> SELECT _FUNC_(NULL, 1, NULL);\n 1")
case class Coalesce(children: Seq[Expression]) extends Expression {
/** Coalesce is nullable if all of its children are nullable, or if it has no children. */
@ -89,6 +92,8 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
/**
* Evaluates to `true` iff it's NaN.
*/
@ExpressionDescription(
usage = "_FUNC_(a) - Returns true if a is NaN and false otherwise.")
case class IsNaN(child: Expression) extends UnaryExpression
with Predicate with ImplicitCastInputTypes {
@ -126,6 +131,8 @@ case class IsNaN(child: Expression) extends UnaryExpression
* An Expression evaluates to `left` iff it's not NaN, or evaluates to `right` otherwise.
* This Expression is useful for mapping NaN values to null.
*/
@ExpressionDescription(
usage = "_FUNC_(a,b) - Returns a iff it's not NaN, or b otherwise.")
case class NaNvl(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -180,6 +187,8 @@ case class NaNvl(left: Expression, right: Expression)
/**
* An expression that is evaluated to true if the input is null.
*/
@ExpressionDescription(
usage = "_FUNC_(a) - Returns true if a is NULL and false otherwise.")
case class IsNull(child: Expression) extends UnaryExpression with Predicate {
override def nullable: Boolean = false
@ -201,6 +210,8 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate {
/**
* An expression that is evaluated to true if the input is not null.
*/
@ExpressionDescription(
usage = "_FUNC_(a) - Returns true if a is not NULL and false otherwise.")
case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
override def nullable: Boolean = false

View file

@ -88,7 +88,8 @@ trait PredicateHelper {
expr.references.subsetOf(plan.outputSet)
}
@ExpressionDescription(
usage = "_FUNC_ a - Logical not")
case class Not(child: Expression)
extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant {
@ -109,6 +110,8 @@ case class Not(child: Expression)
/**
* Evaluates to `true` if `list` contains `value`.
*/
@ExpressionDescription(
usage = "expr _FUNC_(val1, val2, ...) - Returns true if expr equals to any valN.")
case class In(value: Expression, list: Seq[Expression]) extends Predicate
with ImplicitCastInputTypes {
@ -243,6 +246,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Logical AND.")
case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate {
override def inputType: AbstractDataType = BooleanType
@ -306,7 +311,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Logical OR.")
case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate {
override def inputType: AbstractDataType = BooleanType
@ -401,7 +407,8 @@ private[sql] object Equality {
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns TRUE if a equals b and false otherwise.")
case class EqualTo(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@ -426,7 +433,9 @@ case class EqualTo(left: Expression, right: Expression)
}
}
@ExpressionDescription(
usage = """a _FUNC_ b - Returns same result with EQUAL(=) operator for non-null operands,
but returns TRUE if both are NULL, FALSE if one of the them is NULL.""")
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
override def inputType: AbstractDataType = AnyDataType
@ -467,7 +476,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
}
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns TRUE if a is less than b.")
case class LessThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@ -480,7 +490,8 @@ case class LessThan(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2)
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns TRUE if a is not greater than b.")
case class LessThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@ -493,7 +504,8 @@ case class LessThanOrEqual(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2)
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns TRUE if a is greater than b.")
case class GreaterThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {
@ -506,7 +518,8 @@ case class GreaterThan(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2)
}
@ExpressionDescription(
usage = "a _FUNC_ b - Returns TRUE if a is not smaller than b.")
case class GreaterThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

View file

@ -55,6 +55,8 @@ abstract class RDG extends LeafExpression with Nondeterministic {
}
/** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
@ExpressionDescription(
usage = "_FUNC_(a) - Returns a random column with i.i.d. uniformly distributed values in [0, 1).")
case class Rand(seed: Long) extends RDG {
override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
@ -78,6 +80,8 @@ case class Rand(seed: Long) extends RDG {
}
/** Generate a random column with i.i.d. gaussian random distribution. */
@ExpressionDescription(
usage = "_FUNC_(a) - Returns a random column with i.i.d. gaussian random distribution.")
case class Randn(seed: Long) extends RDG {
override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()

View file

@ -67,6 +67,8 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
/**
* Simple RegEx pattern matching function
*/
@ExpressionDescription(
usage = "str _FUNC_ pattern - Returns true if str matches pattern and false otherwise.")
case class Like(left: Expression, right: Expression)
extends BinaryExpression with StringRegexExpression {
@ -117,7 +119,8 @@ case class Like(left: Expression, right: Expression)
}
}
@ExpressionDescription(
usage = "str _FUNC_ regexp - Returns true if str matches regexp and false otherwise.")
case class RLike(left: Expression, right: Expression)
extends BinaryExpression with StringRegexExpression {
@ -169,6 +172,9 @@ case class RLike(left: Expression, right: Expression)
/**
* Splits str around pat (pattern is a regular expression).
*/
@ExpressionDescription(
usage = "_FUNC_(str, regex) - Splits str around occurrences that match regex",
extended = "> SELECT _FUNC_('oneAtwoBthreeC', '[ABC]');\n ['one', 'two', 'three']")
case class StringSplit(str: Expression, pattern: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -198,6 +204,9 @@ case class StringSplit(str: Expression, pattern: Expression)
*
* NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status.
*/
@ExpressionDescription(
usage = "_FUNC_(str, regexp, rep) - replace all substrings of str that match regexp with rep.",
extended = "> SELECT _FUNC_('100-200', '(\\d+)', 'num');\n 'num-num'")
case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -289,6 +298,9 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
*
* NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status.
*/
@ExpressionDescription(
usage = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp.",
extended = "> SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1);\n '100'")
case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
def this(s: Expression, r: Expression) = this(s, r, Literal(1))

View file

@ -35,6 +35,9 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
* An expression that concatenates multiple input strings into a single string.
* If any input is null, concat returns null.
*/
@ExpressionDescription(
usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN",
extended = "> SELECT _FUNC_('Spark','SQL');\n 'SparkSQL'")
case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes {
override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
@ -70,6 +73,10 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas
*
* Returns null if the separator is null. Otherwise, concat_ws skips all null values.
*/
@ExpressionDescription(
usage =
"_FUNC_(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by sep.",
extended = "> SELECT _FUNC_(' ', Spark', 'SQL');\n 'Spark SQL'")
case class ConcatWs(children: Seq[Expression])
extends Expression with ImplicitCastInputTypes {
@ -188,7 +195,7 @@ case class Upper(child: Expression)
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Returns str with all characters changed to lowercase",
extended = "> SELECT _FUNC_('SparkSql');\n'sparksql'")
extended = "> SELECT _FUNC_('SparkSql');\n 'sparksql'")
case class Lower(child: Expression) extends UnaryExpression with String2StringExpression {
override def convert(v: UTF8String): UTF8String = v.toLowerCase
@ -270,6 +277,11 @@ object StringTranslate {
* The translate will happen when any character in the string matching with the character
* in the `matchingExpr`.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """_FUNC_(input, from, to) - Translates the input string by replacing the characters present in the from string with the corresponding characters in the to string""",
extended = "> SELECT _FUNC_('AaBbCc', 'abc', '123');\n 'A1B2C3'")
// scalastyle:on line.size.limit
case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -325,6 +337,12 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
* delimited list (right). Returns 0, if the string wasn't found or if the given
* string (left) contains a comma.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """_FUNC_(str, str_array) - Returns the index (1-based) of the given string (left) in the comma-delimited list (right).
Returns 0, if the string wasn't found or if the given string (left) contains a comma.""",
extended = "> SELECT _FUNC_('ab','abc,b,ab,c,def');\n 3")
// scalastyle:on
case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {
@ -347,6 +365,9 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
/**
* A function that trim the spaces from both ends for the specified string.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Removes the leading and trailing space characters from str.",
extended = "> SELECT _FUNC_(' SparkSQL ');\n 'SparkSQL'")
case class StringTrim(child: Expression)
extends UnaryExpression with String2StringExpression {
@ -362,6 +383,9 @@ case class StringTrim(child: Expression)
/**
* A function that trim the spaces from left end for given string.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Removes the leading space characters from str.",
extended = "> SELECT _FUNC_(' SparkSQL ');\n 'SparkSQL '")
case class StringTrimLeft(child: Expression)
extends UnaryExpression with String2StringExpression {
@ -377,6 +401,9 @@ case class StringTrimLeft(child: Expression)
/**
* A function that trim the spaces from right end for given string.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Removes the trailing space characters from str.",
extended = "> SELECT _FUNC_(' SparkSQL ');\n ' SparkSQL'")
case class StringTrimRight(child: Expression)
extends UnaryExpression with String2StringExpression {
@ -396,6 +423,9 @@ case class StringTrimRight(child: Expression)
*
* NOTE: that this is not zero based, but 1-based index. The first character in str has index 1.
*/
@ExpressionDescription(
usage = "_FUNC_(str, substr) - Returns the (1-based) index of the first occurrence of substr in str.",
extended = "> SELECT _FUNC_('SparkSQL', 'SQL');\n 6")
case class StringInstr(str: Expression, substr: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -422,6 +452,15 @@ case class StringInstr(str: Expression, substr: Expression)
* returned. If count is negative, every to the right of the final delimiter (counting from the
* right) is returned. substring_index performs a case-sensitive match when searching for delim.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """_FUNC_(str, delim, count) - Returns the substring from str before count occurrences of the delimiter delim.
If count is positive, everything to the left of the final delimiter (counting from the
left) is returned. If count is negative, everything to the right of the final delimiter
(counting from the right) is returned. Substring_index performs a case-sensitive match
when searching for delim.""",
extended = "> SELECT _FUNC_('www.apache.org', '.', 2);\n 'www.apache'")
// scalastyle:on line.size.limit
case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -445,6 +484,12 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
* A function that returns the position of the first occurrence of substr
* in given string after position pos.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """_FUNC_(substr, str[, pos]) - Returns the position of the first occurrence of substr in str after position pos.
The given pos and return value are 1-based.""",
extended = "> SELECT _FUNC_('bar', 'foobarbar', 5);\n 7")
// scalastyle:on line.size.limit
case class StringLocate(substr: Expression, str: Expression, start: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -510,6 +555,11 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
/**
* Returns str, left-padded with pad to a length of len.
*/
@ExpressionDescription(
usage = """_FUNC_(str, len, pad) - Returns str, left-padded with pad to a length of len.
If str is longer than len, the return value is shortened to len characters.""",
extended = "> SELECT _FUNC_('hi', 5, '??');\n '???hi'\n" +
"> SELECT _FUNC_('hi', 1, '??');\n 'h'")
case class StringLPad(str: Expression, len: Expression, pad: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -531,6 +581,11 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
/**
* Returns str, right-padded with pad to a length of len.
*/
@ExpressionDescription(
usage = """_FUNC_(str, len, pad) - Returns str, right-padded with pad to a length of len.
If str is longer than len, the return value is shortened to len characters.""",
extended = "> SELECT _FUNC_('hi', 5, '??');\n 'hi???'\n" +
"> SELECT _FUNC_('hi', 1, '??');\n 'h'")
case class StringRPad(str: Expression, len: Expression, pad: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -552,6 +607,11 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression)
/**
* Returns the input formatted according do printf-style format strings
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(String format, Obj... args) - Returns a formatted string from printf-style format strings.",
extended = "> SELECT _FUNC_(\"Hello World %d %s\", 100, \"days\");\n 'Hello World 100 days'")
// scalastyle:on line.size.limit
case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {
require(children.nonEmpty, "format_string() should take at least 1 argument")
@ -642,6 +702,9 @@ case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastI
/**
* Returns the string which repeat the given string value n times.
*/
@ExpressionDescription(
usage = "_FUNC_(str, n) - Returns the string which repeat the given string value n times.",
extended = "> SELECT _FUNC_('123', 2);\n '123123'")
case class StringRepeat(str: Expression, times: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -664,6 +727,9 @@ case class StringRepeat(str: Expression, times: Expression)
/**
* Returns the reversed given string.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Returns the reversed given string.",
extended = "> SELECT _FUNC_('Spark SQL');\n 'LQS krapS'")
case class StringReverse(child: Expression) extends UnaryExpression with String2StringExpression {
override def convert(v: UTF8String): UTF8String = v.reverse()
@ -677,6 +743,9 @@ case class StringReverse(child: Expression) extends UnaryExpression with String2
/**
* Returns a n spaces string.
*/
@ExpressionDescription(
usage = "_FUNC_(n) - Returns a n spaces string.",
extended = "> SELECT _FUNC_(2);\n ' '")
case class StringSpace(child: Expression)
extends UnaryExpression with ImplicitCastInputTypes {
@ -699,7 +768,14 @@ case class StringSpace(child: Expression)
/**
* A function that takes a substring of its first argument starting at a given position.
* Defined for String and Binary types.
*
* NOTE: that this is not zero based, but 1-based index. The first character in str has index 1.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str, pos[, len]) - Returns the substring of str that starts at pos and is of length len or the slice of byte array that starts at pos and is of length len.",
extended = "> SELECT _FUNC_('Spark SQL', 5);\n 'k SQL'\n> SELECT _FUNC_('Spark SQL', -3);\n 'SQL'\n> SELECT _FUNC_('Spark SQL', 5, 1);\n 'k'")
// scalastyle:on line.size.limit
case class Substring(str: Expression, pos: Expression, len: Expression)
extends TernaryExpression with ImplicitCastInputTypes {
@ -737,6 +813,9 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
/**
* A function that return the length of the given string or binary expression.
*/
@ExpressionDescription(
usage = "_FUNC_(str | binary) - Returns the length of str or number of bytes in binary data.",
extended = "> SELECT _FUNC_('Spark SQL');\n 9")
case class Length(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@ -757,6 +836,9 @@ case class Length(child: Expression) extends UnaryExpression with ExpectsInputTy
/**
* A function that return the Levenshtein distance between the two given strings.
*/
@ExpressionDescription(
usage = "_FUNC_(str1, str2) - Returns the Levenshtein distance between the two given strings.",
extended = "> SELECT _FUNC_('kitten', 'sitting');\n 3")
case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression
with ImplicitCastInputTypes {
@ -775,6 +857,9 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
/**
* A function that return soundex code of the given string expression.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Returns soundex code of the string.",
extended = "> SELECT _FUNC_('Miller');\n 'M460'")
case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = StringType
@ -791,6 +876,10 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT
/**
* Returns the numeric value of the first character of str.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Returns the numeric value of the first character of str.",
extended = "> SELECT _FUNC_('222');\n 50\n" +
"> SELECT _FUNC_(2);\n 50")
case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
@ -822,6 +911,8 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp
/**
* Converts the argument from binary to a base 64 string.
*/
@ExpressionDescription(
usage = "_FUNC_(bin) - Convert the argument from binary to a base 64 string.")
case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = StringType
@ -844,6 +935,8 @@ case class Base64(child: Expression) extends UnaryExpression with ImplicitCastIn
/**
* Converts the argument from a base 64 string to BINARY.
*/
@ExpressionDescription(
usage = "_FUNC_(str) - Convert the argument from a base 64 string to binary.")
case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = BinaryType
@ -865,6 +958,8 @@ case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCast
* (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
* If either argument is null, the result will also be null.
*/
@ExpressionDescription(
usage = "_FUNC_(bin, str) - Decode the first argument using the second argument character set.")
case class Decode(bin: Expression, charset: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -894,7 +989,9 @@ case class Decode(bin: Expression, charset: Expression)
* Encodes the first argument into a BINARY using the provided character set
* (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
* If either argument is null, the result will also be null.
*/
*/
@ExpressionDescription(
usage = "_FUNC_(str, str) - Encode the first argument using the second argument character set.")
case class Encode(value: Expression, charset: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
@ -924,6 +1021,11 @@ case class Encode(value: Expression, charset: Expression)
* and returns the result as a string. If D is 0, the result has no decimal point or
* fractional part.
*/
@ExpressionDescription(
usage = """_FUNC_(X, D) - Formats the number X like '#,###,###.##', rounded to D decimal places.
If D is 0, the result has no decimal point or fractional part.
This is supposed to function like MySQL's FORMAT.""",
extended = "> SELECT _FUNC_(12332.123456, 4);\n '12,332.1235'")
case class FormatNumber(x: Expression, d: Expression)
extends BinaryExpression with ExpectsInputTypes {

View file

@ -89,6 +89,14 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
"Function: abcadf not found.")
}
test("SPARK-14415: All functions should have own descriptions") {
for (f <- sqlContext.sessionState.functionRegistry.listFunction()) {
if (!Seq("cube", "grouping", "grouping_id", "rollup", "window").contains(f)) {
checkExistence(sql(s"describe function `$f`"), false, "To be added.")
}
}
}
test("SPARK-6743: no columns from cache") {
Seq(
(83, 0, 38),

View file

@ -238,7 +238,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkExistence(sql("describe functioN `~`"), true,
"Function: ~",
"Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot",
"Usage: To be added.")
"Usage: ~ b - Bitwise NOT.")
// Hard coded describe functions
checkExistence(sql("describe function `<>`"), true,