[SPARK-20748][SQL] Add built-in SQL function CH[A]R.

## What changes were proposed in this pull request?
Add built-in SQL function `CH[A]R`:
For `CHR(bigint|double n)`, returns the ASCII character having the binary equivalent to `n`. If n is larger than 256 the result is equivalent to CHR(n % 256)

## How was this patch tested?
unit tests

Author: Yuming Wang <wgyumg@gmail.com>

Closes #18019 from wangyum/SPARK-20748.
This commit is contained in:
Yuming Wang 2017-05-26 20:59:14 -07:00 committed by Xiao Li
parent 1d62f8aca8
commit a0f8a072e3
3 changed files with 60 additions and 0 deletions

View file

@ -276,6 +276,8 @@ object FunctionRegistry {
// string functions // string functions
expression[Ascii]("ascii"), expression[Ascii]("ascii"),
expression[Chr]("char"),
expression[Chr]("chr"),
expression[Base64]("base64"), expression[Base64]("base64"),
expression[Concat]("concat"), expression[Concat]("concat"),
expression[ConcatWs]("concat_ws"), expression[ConcatWs]("concat_ws"),

View file

@ -1267,6 +1267,51 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp
} }
} }
/**
* Returns the ASCII character having the binary equivalent to n.
* If n is larger than 256 the result is equivalent to chr(n % 256)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the ASCII character having the binary equivalent to `expr`. If n is larger than 256 the result is equivalent to chr(n % 256)",
extended = """
Examples:
> SELECT _FUNC_(65);
A
""")
// scalastyle:on line.size.limit
case class Chr(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = StringType
override def inputTypes: Seq[DataType] = Seq(LongType)
protected override def nullSafeEval(lon: Any): Any = {
val longVal = lon.asInstanceOf[Long]
if (longVal < 0) {
UTF8String.EMPTY_UTF8
} else if ((longVal & 0xFF) == 0) {
UTF8String.fromString(Character.MIN_VALUE.toString)
} else {
UTF8String.fromString((longVal & 0xFF).toChar.toString)
}
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, lon => {
s"""
if ($lon < 0) {
${ev.value} = UTF8String.EMPTY_UTF8;
} else if (($lon & 0xFF) == 0) {
${ev.value} = UTF8String.fromString(String.valueOf(Character.MIN_VALUE));
} else {
char c = (char)($lon & 0xFF);
${ev.value} = UTF8String.fromString(String.valueOf(c));
}
"""
})
}
}
/** /**
* Converts the argument from binary to a base 64 string. * Converts the argument from binary to a base 64 string.
*/ */

View file

@ -263,6 +263,19 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(Ascii(Literal.create(null, StringType)), null, create_row("abdef")) checkEvaluation(Ascii(Literal.create(null, StringType)), null, create_row("abdef"))
} }
test("string for ascii") {
val a = 'a.long.at(0)
checkEvaluation(Chr(Literal(48L)), "0", create_row("abdef"))
checkEvaluation(Chr(a), "a", create_row(97L))
checkEvaluation(Chr(a), "a", create_row(97L + 256L))
checkEvaluation(Chr(a), "", create_row(-9L))
checkEvaluation(Chr(a), Character.MIN_VALUE.toString, create_row(0L))
checkEvaluation(Chr(a), Character.MIN_VALUE.toString, create_row(256L))
checkEvaluation(Chr(a), null, create_row(null))
checkEvaluation(Chr(a), 149.toChar.toString, create_row(149L))
checkEvaluation(Chr(Literal.create(null, LongType)), null, create_row("abdef"))
}
test("base64/unbase64 for string") { test("base64/unbase64 for string") {
val a = 'a.string.at(0) val a = 'a.string.at(0)
val b = 'b.binary.at(0) val b = 'b.binary.at(0)