diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 7521a7e124..a4c7f7a8de 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -276,6 +276,8 @@ object FunctionRegistry { // string functions expression[Ascii]("ascii"), + expression[Chr]("char"), + expression[Chr]("chr"), expression[Base64]("base64"), expression[Concat]("concat"), expression[ConcatWs]("concat_ws"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 5598a14699..aba2f5f81f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1267,6 +1267,51 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp } } +/** + * Returns the ASCII character having the binary equivalent to n. + * If n is larger than 256 the result is equivalent to chr(n % 256) + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the ASCII character having the binary equivalent to `expr`. If n is larger than 256 the result is equivalent to chr(n % 256)", + extended = """ + Examples: + > SELECT _FUNC_(65); + A + """) +// scalastyle:on line.size.limit +case class Chr(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = StringType + override def inputTypes: Seq[DataType] = Seq(LongType) + + protected override def nullSafeEval(lon: Any): Any = { + val longVal = lon.asInstanceOf[Long] + if (longVal < 0) { + UTF8String.EMPTY_UTF8 + } else if ((longVal & 0xFF) == 0) { + UTF8String.fromString(Character.MIN_VALUE.toString) + } else { + UTF8String.fromString((longVal & 0xFF).toChar.toString) + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, lon => { + s""" + if ($lon < 0) { + ${ev.value} = UTF8String.EMPTY_UTF8; + } else if (($lon & 0xFF) == 0) { + ${ev.value} = UTF8String.fromString(String.valueOf(Character.MIN_VALUE)); + } else { + char c = (char)($lon & 0xFF); + ${ev.value} = UTF8String.fromString(String.valueOf(c)); + } + """ + }) + } +} + /** * Converts the argument from binary to a base 64 string. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 26978a0482..9ae438d568 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -263,6 +263,19 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Ascii(Literal.create(null, StringType)), null, create_row("abdef")) } + test("string for ascii") { + val a = 'a.long.at(0) + checkEvaluation(Chr(Literal(48L)), "0", create_row("abdef")) + checkEvaluation(Chr(a), "a", create_row(97L)) + checkEvaluation(Chr(a), "a", create_row(97L + 256L)) + checkEvaluation(Chr(a), "", create_row(-9L)) + checkEvaluation(Chr(a), Character.MIN_VALUE.toString, create_row(0L)) + checkEvaluation(Chr(a), Character.MIN_VALUE.toString, create_row(256L)) + checkEvaluation(Chr(a), null, create_row(null)) + checkEvaluation(Chr(a), 149.toChar.toString, create_row(149L)) + checkEvaluation(Chr(Literal.create(null, LongType)), null, create_row("abdef")) + } + test("base64/unbase64 for string") { val a = 'a.string.at(0) val b = 'b.binary.at(0)