[SPARK-23901][SQL] Add masking functions

## What changes were proposed in this pull request?

The PR adds the masking function as they are described in Hive's documentation: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-DataMaskingFunctions.
This means that only `string`s are accepted as parameter for the masking functions.

## How was this patch tested?

added UTs

Author: Marco Gaido <marcogaido91@gmail.com>

Closes #21246 from mgaido91/SPARK-23901.
This commit is contained in:
Marco Gaido 2018-05-30 11:18:04 -07:00 committed by Takuya UESHIN
parent ec6f971dc5
commit 1b36f14889
6 changed files with 1119 additions and 0 deletions

View file

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions;
/**
* Contains all the Utils methods used in the masking expressions.
*/
public class MaskExpressionsUtils {
static final int UNMASKED_VAL = -1;
/**
* Returns the masking character for {@param c} or {@param c} is it should not be masked.
* @param c the character to transform
* @param maskedUpperChar the character to use instead of a uppercase letter
* @param maskedLowerChar the character to use instead of a lowercase letter
* @param maskedDigitChar the character to use instead of a digit
* @param maskedOtherChar the character to use instead of a any other character
* @return masking character for {@param c}
*/
public static int transformChar(
final int c,
int maskedUpperChar,
int maskedLowerChar,
int maskedDigitChar,
int maskedOtherChar) {
switch(Character.getType(c)) {
case Character.UPPERCASE_LETTER:
if(maskedUpperChar != UNMASKED_VAL) {
return maskedUpperChar;
}
break;
case Character.LOWERCASE_LETTER:
if(maskedLowerChar != UNMASKED_VAL) {
return maskedLowerChar;
}
break;
case Character.DECIMAL_DIGIT_NUMBER:
if(maskedDigitChar != UNMASKED_VAL) {
return maskedDigitChar;
}
break;
default:
if(maskedOtherChar != UNMASKED_VAL) {
return maskedOtherChar;
}
break;
}
return c;
}
/**
* Returns the replacement char to use according to the {@param rep} specified by the user and
* the {@param def} default.
*/
public static int getReplacementChar(String rep, int def) {
if (rep != null && rep.length() > 0) {
return rep.codePointAt(0);
}
return def;
}
}

View file

@ -432,6 +432,14 @@ object FunctionRegistry {
expression[ArrayRepeat]("array_repeat"), expression[ArrayRepeat]("array_repeat"),
CreateStruct.registryEntry, CreateStruct.registryEntry,
// mask functions
expression[Mask]("mask"),
expression[MaskFirstN]("mask_first_n"),
expression[MaskLastN]("mask_last_n"),
expression[MaskShowFirstN]("mask_show_first_n"),
expression[MaskShowLastN]("mask_show_last_n"),
expression[MaskHash]("mask_hash"),
// misc functions // misc functions
expression[AssertTrue]("assert_true"), expression[AssertTrue]("assert_true"),
expression[Crc32]("crc32"), expression[Crc32]("crc32"),

View file

@ -0,0 +1,569 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions
import org.apache.commons.codec.digest.DigestUtils
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._
import org.apache.spark.sql.catalyst.expressions.MaskLike._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
trait MaskLike {
def upper: String
def lower: String
def digit: String
protected lazy val upperReplacement: Int = getReplacementChar(upper, defaultMaskedUppercase)
protected lazy val lowerReplacement: Int = getReplacementChar(lower, defaultMaskedLowercase)
protected lazy val digitReplacement: Int = getReplacementChar(digit, defaultMaskedDigit)
protected val maskUtilsClassName: String = classOf[MaskExpressionsUtils].getName
def inputStringLengthCode(inputString: String, length: String): String = {
s"${CodeGenerator.JAVA_INT} $length = $inputString.codePointCount(0, $inputString.length());"
}
def appendMaskedToStringBuilderCode(
ctx: CodegenContext,
sb: String,
inputString: String,
offset: String,
numChars: String): String = {
val i = ctx.freshName("i")
val codePoint = ctx.freshName("codePoint")
s"""
|for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
| ${CodeGenerator.JAVA_INT} $codePoint = $inputString.codePointAt($offset);
| $sb.appendCodePoint($maskUtilsClassName.transformChar($codePoint,
| $upperReplacement, $lowerReplacement,
| $digitReplacement, $defaultMaskedOther));
| $offset += Character.charCount($codePoint);
|}
""".stripMargin
}
def appendUnchangedToStringBuilderCode(
ctx: CodegenContext,
sb: String,
inputString: String,
offset: String,
numChars: String): String = {
val i = ctx.freshName("i")
val codePoint = ctx.freshName("codePoint")
s"""
|for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) {
| ${CodeGenerator.JAVA_INT} $codePoint = $inputString.codePointAt($offset);
| $sb.appendCodePoint($codePoint);
| $offset += Character.charCount($codePoint);
|}
""".stripMargin
}
def appendMaskedToStringBuilder(
sb: java.lang.StringBuilder,
inputString: String,
startOffset: Int,
numChars: Int): Int = {
var offset = startOffset
(1 to numChars) foreach { _ =>
val codePoint = inputString.codePointAt(offset)
sb.appendCodePoint(transformChar(
codePoint,
upperReplacement,
lowerReplacement,
digitReplacement,
defaultMaskedOther))
offset += Character.charCount(codePoint)
}
offset
}
def appendUnchangedToStringBuilder(
sb: java.lang.StringBuilder,
inputString: String,
startOffset: Int,
numChars: Int): Int = {
var offset = startOffset
(1 to numChars) foreach { _ =>
val codePoint = inputString.codePointAt(offset)
sb.appendCodePoint(codePoint)
offset += Character.charCount(codePoint)
}
offset
}
}
trait MaskLikeWithN extends MaskLike {
def n: Int
protected lazy val charCount: Int = if (n < 0) 0 else n
}
/**
* Utils for mask operations.
*/
object MaskLike {
val defaultCharCount = 4
val defaultMaskedUppercase: Int = 'X'
val defaultMaskedLowercase: Int = 'x'
val defaultMaskedDigit: Int = 'n'
val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL
def extractCharCount(e: Expression): Int = e match {
case Literal(i, IntegerType | NullType) =>
if (i == null) defaultCharCount else i.asInstanceOf[Int]
case Literal(_, dt) => throw new AnalysisException("Expected literal expression of type " +
s"${IntegerType.simpleString}, but got literal of ${dt.simpleString}")
case other => throw new AnalysisException(s"Expected literal expression, but got ${other.sql}")
}
def extractReplacement(e: Expression): String = e match {
case Literal(s, StringType | NullType) => if (s == null) null else s.toString
case Literal(_, dt) => throw new AnalysisException("Expected literal expression of type " +
s"${StringType.simpleString}, but got literal of ${dt.simpleString}")
case other => throw new AnalysisException(s"Expected literal expression, but got ${other.sql}")
}
}
/**
* Masks the input string. Additional parameters can be set to change the masking chars for
* uppercase letters, lowercase letters and digits.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str[, upper[, lower[, digit]]]) - Masks str. By default, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.",
examples = """
Examples:
> SELECT _FUNC_("abcd-EFGH-8765-4321", "U", "l", "#");
llll-UUUU-####-####
""")
// scalastyle:on line.size.limit
case class Mask(child: Expression, upper: String, lower: String, digit: String)
extends UnaryExpression with ExpectsInputTypes with MaskLike {
def this(child: Expression) = this(child, null.asInstanceOf[String], null, null)
def this(child: Expression, upper: Expression) =
this(child, extractReplacement(upper), null, null)
def this(child: Expression, upper: Expression, lower: Expression) =
this(child, extractReplacement(upper), extractReplacement(lower), null)
def this(child: Expression, upper: Expression, lower: Expression, digit: Expression) =
this(child, extractReplacement(upper), extractReplacement(lower), extractReplacement(digit))
override def nullSafeEval(input: Any): Any = {
val str = input.asInstanceOf[UTF8String].toString
val length = str.codePointCount(0, str.length())
val sb = new java.lang.StringBuilder(length)
appendMaskedToStringBuilder(sb, str, 0, length)
UTF8String.fromString(sb.toString)
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input: String) => {
val sb = ctx.freshName("sb")
val length = ctx.freshName("length")
val offset = ctx.freshName("offset")
val inputString = ctx.freshName("inputString")
s"""
|String $inputString = $input.toString();
|${inputStringLengthCode(inputString, length)}
|StringBuilder $sb = new StringBuilder($length);
|${CodeGenerator.JAVA_INT} $offset = 0;
|${appendMaskedToStringBuilderCode(ctx, sb, inputString, offset, length)}
|${ev.value} = UTF8String.fromString($sb.toString());
""".stripMargin
})
}
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
}
/**
* Masks the first N chars of the input string. N defaults to 4. Additional parameters can be set
* to change the masking chars for uppercase letters, lowercase letters and digits.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks the first n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.",
examples = """
Examples:
> SELECT _FUNC_("1234-5678-8765-4321", 4);
nnnn-5678-8765-4321
""")
// scalastyle:on line.size.limit
case class MaskFirstN(
child: Expression,
n: Int,
upper: String,
lower: String,
digit: String)
extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN {
def this(child: Expression) =
this(child, defaultCharCount, null, null, null)
def this(child: Expression, n: Expression) =
this(child, extractCharCount(n), null, null, null)
def this(child: Expression, n: Expression, upper: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), null, null)
def this(child: Expression, n: Expression, upper: Expression, lower: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null)
def this(
child: Expression,
n: Expression,
upper: Expression,
lower: Expression,
digit: Expression) =
this(child,
extractCharCount(n),
extractReplacement(upper),
extractReplacement(lower),
extractReplacement(digit))
override def nullSafeEval(input: Any): Any = {
val str = input.asInstanceOf[UTF8String].toString
val length = str.codePointCount(0, str.length())
val endOfMask = if (charCount > length) length else charCount
val sb = new java.lang.StringBuilder(length)
val offset = appendMaskedToStringBuilder(sb, str, 0, endOfMask)
appendUnchangedToStringBuilder(sb, str, offset, length - endOfMask)
UTF8String.fromString(sb.toString)
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input: String) => {
val sb = ctx.freshName("sb")
val length = ctx.freshName("length")
val offset = ctx.freshName("offset")
val inputString = ctx.freshName("inputString")
val endOfMask = ctx.freshName("endOfMask")
s"""
|String $inputString = $input.toString();
|${inputStringLengthCode(inputString, length)}
|${CodeGenerator.JAVA_INT} $endOfMask = $charCount > $length ? $length : $charCount;
|${CodeGenerator.JAVA_INT} $offset = 0;
|StringBuilder $sb = new StringBuilder($length);
|${appendMaskedToStringBuilderCode(ctx, sb, inputString, offset, endOfMask)}
|${appendUnchangedToStringBuilderCode(
ctx, sb, inputString, offset, s"$length - $endOfMask")}
|${ev.value} = UTF8String.fromString($sb.toString());
|""".stripMargin
})
}
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
override def prettyName: String = "mask_first_n"
}
/**
* Masks the last N chars of the input string. N defaults to 4. Additional parameters can be set
* to change the masking chars for uppercase letters, lowercase letters and digits.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks the last n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.",
examples = """
Examples:
> SELECT _FUNC_("1234-5678-8765-4321", 4);
1234-5678-8765-nnnn
""", since = "2.4.0")
// scalastyle:on line.size.limit
case class MaskLastN(
child: Expression,
n: Int,
upper: String,
lower: String,
digit: String)
extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN {
def this(child: Expression) =
this(child, defaultCharCount, null, null, null)
def this(child: Expression, n: Expression) =
this(child, extractCharCount(n), null, null, null)
def this(child: Expression, n: Expression, upper: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), null, null)
def this(child: Expression, n: Expression, upper: Expression, lower: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null)
def this(
child: Expression,
n: Expression,
upper: Expression,
lower: Expression,
digit: Expression) =
this(child,
extractCharCount(n),
extractReplacement(upper),
extractReplacement(lower),
extractReplacement(digit))
override def nullSafeEval(input: Any): Any = {
val str = input.asInstanceOf[UTF8String].toString
val length = str.codePointCount(0, str.length())
val startOfMask = if (charCount >= length) 0 else length - charCount
val sb = new java.lang.StringBuilder(length)
val offset = appendUnchangedToStringBuilder(sb, str, 0, startOfMask)
appendMaskedToStringBuilder(sb, str, offset, length - startOfMask)
UTF8String.fromString(sb.toString)
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input: String) => {
val sb = ctx.freshName("sb")
val length = ctx.freshName("length")
val offset = ctx.freshName("offset")
val inputString = ctx.freshName("inputString")
val startOfMask = ctx.freshName("startOfMask")
s"""
|String $inputString = $input.toString();
|${inputStringLengthCode(inputString, length)}
|${CodeGenerator.JAVA_INT} $startOfMask = $charCount >= $length ?
| 0 : $length - $charCount;
|${CodeGenerator.JAVA_INT} $offset = 0;
|StringBuilder $sb = new StringBuilder($length);
|${appendUnchangedToStringBuilderCode(ctx, sb, inputString, offset, startOfMask)}
|${appendMaskedToStringBuilderCode(
ctx, sb, inputString, offset, s"$length - $startOfMask")}
|${ev.value} = UTF8String.fromString($sb.toString());
|""".stripMargin
})
}
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
override def prettyName: String = "mask_last_n"
}
/**
* Masks all but the first N chars of the input string. N defaults to 4. Additional parameters can
* be set to change the masking chars for uppercase letters, lowercase letters and digits.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks all but the first n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.",
examples = """
Examples:
> SELECT _FUNC_("1234-5678-8765-4321", 4);
1234-nnnn-nnnn-nnnn
""", since = "2.4.0")
// scalastyle:on line.size.limit
case class MaskShowFirstN(
child: Expression,
n: Int,
upper: String,
lower: String,
digit: String)
extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN {
def this(child: Expression) =
this(child, defaultCharCount, null, null, null)
def this(child: Expression, n: Expression) =
this(child, extractCharCount(n), null, null, null)
def this(child: Expression, n: Expression, upper: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), null, null)
def this(child: Expression, n: Expression, upper: Expression, lower: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null)
def this(
child: Expression,
n: Expression,
upper: Expression,
lower: Expression,
digit: Expression) =
this(child,
extractCharCount(n),
extractReplacement(upper),
extractReplacement(lower),
extractReplacement(digit))
override def nullSafeEval(input: Any): Any = {
val str = input.asInstanceOf[UTF8String].toString
val length = str.codePointCount(0, str.length())
val startOfMask = if (charCount > length) length else charCount
val sb = new java.lang.StringBuilder(length)
val offset = appendUnchangedToStringBuilder(sb, str, 0, startOfMask)
appendMaskedToStringBuilder(sb, str, offset, length - startOfMask)
UTF8String.fromString(sb.toString)
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input: String) => {
val sb = ctx.freshName("sb")
val length = ctx.freshName("length")
val offset = ctx.freshName("offset")
val inputString = ctx.freshName("inputString")
val startOfMask = ctx.freshName("startOfMask")
s"""
|String $inputString = $input.toString();
|${inputStringLengthCode(inputString, length)}
|${CodeGenerator.JAVA_INT} $startOfMask = $charCount > $length ? $length : $charCount;
|${CodeGenerator.JAVA_INT} $offset = 0;
|StringBuilder $sb = new StringBuilder($length);
|${appendUnchangedToStringBuilderCode(ctx, sb, inputString, offset, startOfMask)}
|${appendMaskedToStringBuilderCode(
ctx, sb, inputString, offset, s"$length - $startOfMask")}
|${ev.value} = UTF8String.fromString($sb.toString());
|""".stripMargin
})
}
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
override def prettyName: String = "mask_show_first_n"
}
/**
* Masks all but the last N chars of the input string. N defaults to 4. Additional parameters can
* be set to change the masking chars for uppercase letters, lowercase letters and digits.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks all but the last n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.",
examples = """
Examples:
> SELECT _FUNC_("1234-5678-8765-4321", 4);
nnnn-nnnn-nnnn-4321
""", since = "2.4.0")
// scalastyle:on line.size.limit
case class MaskShowLastN(
child: Expression,
n: Int,
upper: String,
lower: String,
digit: String)
extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN {
def this(child: Expression) =
this(child, defaultCharCount, null, null, null)
def this(child: Expression, n: Expression) =
this(child, extractCharCount(n), null, null, null)
def this(child: Expression, n: Expression, upper: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), null, null)
def this(child: Expression, n: Expression, upper: Expression, lower: Expression) =
this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null)
def this(
child: Expression,
n: Expression,
upper: Expression,
lower: Expression,
digit: Expression) =
this(child,
extractCharCount(n),
extractReplacement(upper),
extractReplacement(lower),
extractReplacement(digit))
override def nullSafeEval(input: Any): Any = {
val str = input.asInstanceOf[UTF8String].toString
val length = str.codePointCount(0, str.length())
val endOfMask = if (charCount >= length) 0 else length - charCount
val sb = new java.lang.StringBuilder(length)
val offset = appendMaskedToStringBuilder(sb, str, 0, endOfMask)
appendUnchangedToStringBuilder(sb, str, offset, length - endOfMask)
UTF8String.fromString(sb.toString)
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input: String) => {
val sb = ctx.freshName("sb")
val length = ctx.freshName("length")
val offset = ctx.freshName("offset")
val inputString = ctx.freshName("inputString")
val endOfMask = ctx.freshName("endOfMask")
s"""
|String $inputString = $input.toString();
|${inputStringLengthCode(inputString, length)}
|${CodeGenerator.JAVA_INT} $endOfMask = $charCount >= $length ? 0 : $length - $charCount;
|${CodeGenerator.JAVA_INT} $offset = 0;
|StringBuilder $sb = new StringBuilder($length);
|${appendMaskedToStringBuilderCode(ctx, sb, inputString, offset, endOfMask)}
|${appendUnchangedToStringBuilderCode(
ctx, sb, inputString, offset, s"$length - $endOfMask")}
|${ev.value} = UTF8String.fromString($sb.toString());
|""".stripMargin
})
}
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
override def prettyName: String = "mask_show_last_n"
}
/**
* Returns a hashed value based on str.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str) - Returns a hashed value based on str. The hash is consistent and can be used to join masked values together across tables.",
examples = """
Examples:
> SELECT _FUNC_("abcd-EFGH-8765-4321");
60c713f5ec6912229d2060df1c322776
""")
// scalastyle:on line.size.limit
case class MaskHash(child: Expression)
extends UnaryExpression with ExpectsInputTypes {
override def nullSafeEval(input: Any): Any = {
UTF8String.fromString(DigestUtils.md5Hex(input.asInstanceOf[UTF8String].toString))
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input: String) => {
val digestUtilsClass = classOf[DigestUtils].getName.stripSuffix("$")
s"""
|${ev.value} = UTF8String.fromString($digestUtilsClass.md5Hex($input.toString()));
|""".stripMargin
})
}
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
override def prettyName: String = "mask_hash"
}

View file

@ -0,0 +1,236 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.types.{IntegerType, StringType}
class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
test("mask") {
checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), "U", "l", "#"), "llll-UUUU-####-####")
checkEvaluation(
new Mask(Literal("abcd-EFGH-8765-4321"), Literal("U"), Literal("l"), Literal("#")),
"llll-UUUU-####-####")
checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("U"), Literal("l")),
"llll-UUUU-nnnn-nnnn")
checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("U")), "xxxx-UUUU-nnnn-nnnn")
checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321")), "xxxx-XXXX-nnnn-nnnn")
checkEvaluation(new Mask(Literal(null, StringType)), null)
checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), null, "l", "#"), "llll-XXXX-####-####")
checkEvaluation(new Mask(
Literal("abcd-EFGH-8765-4321"),
Literal(null, StringType),
Literal(null, StringType),
Literal(null, StringType)), "xxxx-XXXX-nnnn-nnnn")
checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("Upper")),
"xxxx-UUUU-nnnn-nnnn")
checkEvaluation(new Mask(Literal("")), "")
checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("")), "xxxx-XXXX-nnnn-nnnn")
checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), "", "", ""), "xxxx-XXXX-nnnn-nnnn")
// scalastyle:off nonascii
checkEvaluation(Mask(Literal("Ul9U"), "\u2200", null, null), "\u2200xn\u2200")
checkEvaluation(new Mask(Literal("Hello World, こんにちは, 𠀋"), Literal("あ"), Literal("𡈽")),
"あ𡈽𡈽𡈽𡈽 あ𡈽𡈽𡈽𡈽, こんにちは, 𠀋")
// scalastyle:on nonascii
intercept[AnalysisException] {
checkEvaluation(new Mask(Literal(""), Literal(1)), "")
}
}
test("mask_first_n") {
checkEvaluation(MaskFirstN(Literal("aB3d-EFGH-8765"), 6, "U", "l", "#"),
"lU#l-UFGH-8765")
checkEvaluation(new MaskFirstN(
Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")),
"llll-UFGH-8765-4321")
checkEvaluation(
new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l")),
"llll-UFGH-8765-4321")
checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U")),
"xxxx-UFGH-8765-4321")
checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6)),
"xxxx-XFGH-8765-4321")
intercept[AnalysisException] {
checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal("U")), "")
}
checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321")), "xxxx-EFGH-8765-4321")
checkEvaluation(new MaskFirstN(Literal(null, StringType)), null)
checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null),
"llll-EFGH-8765-4321")
checkEvaluation(new MaskFirstN(
Literal("abcd-EFGH-8765-4321"),
Literal(null, IntegerType),
Literal(null, StringType),
Literal(null, StringType),
Literal(null, StringType)), "xxxx-EFGH-8765-4321")
checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("Upper")),
"xxxx-UFGH-8765-4321")
checkEvaluation(new MaskFirstN(Literal("")), "")
checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(4), Literal("")),
"xxxx-EFGH-8765-4321")
checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""),
"xxxx-XXXX-nnnn-nnnn")
checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""),
"abcd-EFGH-8765-4321")
// scalastyle:off nonascii
checkEvaluation(MaskFirstN(Literal("Ul9U"), 2, "\u2200", null, null), "\u2200x9U")
checkEvaluation(new MaskFirstN(Literal("あ, 𠀋, Hello World"), Literal(10)),
"あ, 𠀋, Xxxxo World")
// scalastyle:on nonascii
}
test("mask_last_n") {
checkEvaluation(MaskLastN(Literal("abcd-EFGH-aB3d"), 6, "U", "l", "#"),
"abcd-EFGU-lU#l")
checkEvaluation(new MaskLastN(
Literal("abcd-EFGH-8765"), Literal(6), Literal("U"), Literal("l"), Literal("#")),
"abcd-EFGU-####")
checkEvaluation(
new MaskLastN(Literal("abcd-EFGH-8765"), Literal(6), Literal("U"), Literal("l")),
"abcd-EFGU-nnnn")
checkEvaluation(
new MaskLastN(Literal("abcd-EFGH-8765"), Literal(6), Literal("U")),
"abcd-EFGU-nnnn")
checkEvaluation(
new MaskLastN(Literal("abcd-EFGH-8765"), Literal(6)),
"abcd-EFGX-nnnn")
intercept[AnalysisException] {
checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765"), Literal("U")), "")
}
checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765-4321")), "abcd-EFGH-8765-nnnn")
checkEvaluation(new MaskLastN(Literal(null, StringType)), null)
checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null),
"abcd-EFGH-8765-nnnn")
checkEvaluation(new MaskLastN(
Literal("abcd-EFGH-8765-4321"),
Literal(null, IntegerType),
Literal(null, StringType),
Literal(null, StringType),
Literal(null, StringType)), "abcd-EFGH-8765-nnnn")
checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765-4321"), Literal(12), Literal("Upper")),
"abcd-EFUU-nnnn-nnnn")
checkEvaluation(new MaskLastN(Literal("")), "")
checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765-4321"), Literal(16), Literal("")),
"abcx-XXXX-nnnn-nnnn")
checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""),
"xxxx-XXXX-nnnn-nnnn")
checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""),
"abcd-EFGH-8765-4321")
// scalastyle:off nonascii
checkEvaluation(MaskLastN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200")
checkEvaluation(new MaskLastN(Literal("あ, 𠀋, Hello World あ 𠀋"), Literal(10)),
"あ, 𠀋, Hello Xxxxx あ 𠀋")
// scalastyle:on nonascii
}
test("mask_show_first_n") {
checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-aB3d"), 6, "U", "l", "#"),
"abcd-EUUU-####-lU#l")
checkEvaluation(new MaskShowFirstN(
Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")),
"abcd-EUUU-####-####")
checkEvaluation(
new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l")),
"abcd-EUUU-nnnn-nnnn")
checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U")),
"abcd-EUUU-nnnn-nnnn")
checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6)),
"abcd-EXXX-nnnn-nnnn")
intercept[AnalysisException] {
checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal("U")), "")
}
checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321")), "abcd-XXXX-nnnn-nnnn")
checkEvaluation(new MaskShowFirstN(Literal(null, StringType)), null)
checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null),
"abcd-UUUU-nnnn-nnnn")
checkEvaluation(new MaskShowFirstN(
Literal("abcd-EFGH-8765-4321"),
Literal(null, IntegerType),
Literal(null, StringType),
Literal(null, StringType),
Literal(null, StringType)), "abcd-XXXX-nnnn-nnnn")
checkEvaluation(
new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("Upper")),
"abcd-EUUU-nnnn-nnnn")
checkEvaluation(new MaskShowFirstN(Literal("")), "")
checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(4), Literal("")),
"abcd-XXXX-nnnn-nnnn")
checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""),
"abcd-EFGH-8765-4321")
checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""),
"xxxx-XXXX-nnnn-nnnn")
// scalastyle:off nonascii
checkEvaluation(MaskShowFirstN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200")
checkEvaluation(new MaskShowFirstN(Literal("あ, 𠀋, Hello World"), Literal(10)),
"あ, 𠀋, Hellx Xxxxx")
// scalastyle:on nonascii
}
test("mask_show_last_n") {
checkEvaluation(MaskShowLastN(Literal("aB3d-EFGH-8765"), 6, "U", "l", "#"),
"lU#l-UUUH-8765")
checkEvaluation(new MaskShowLastN(
Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")),
"llll-UUUU-###5-4321")
checkEvaluation(
new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l")),
"llll-UUUU-nnn5-4321")
checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U")),
"xxxx-UUUU-nnn5-4321")
checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6)),
"xxxx-XXXX-nnn5-4321")
intercept[AnalysisException] {
checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal("U")), "")
}
checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321")), "xxxx-XXXX-nnnn-4321")
checkEvaluation(new MaskShowLastN(Literal(null, StringType)), null)
checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null),
"llll-UUUU-nnnn-4321")
checkEvaluation(new MaskShowLastN(
Literal("abcd-EFGH-8765-4321"),
Literal(null, IntegerType),
Literal(null, StringType),
Literal(null, StringType),
Literal(null, StringType)), "xxxx-XXXX-nnnn-4321")
checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("Upper")),
"xxxx-UUUU-nnn5-4321")
checkEvaluation(new MaskShowLastN(Literal("")), "")
checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(4), Literal("")),
"xxxx-XXXX-nnnn-4321")
checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""),
"abcd-EFGH-8765-4321")
checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""),
"xxxx-XXXX-nnnn-nnnn")
// scalastyle:off nonascii
checkEvaluation(MaskShowLastN(Literal("Ul9U"), 2, "\u2200", null, null), "\u2200x9U")
checkEvaluation(new MaskShowLastN(Literal("あ, 𠀋, Hello World"), Literal(10)),
"あ, 𠀋, Xello World")
// scalastyle:on nonascii
}
test("mask_hash") {
checkEvaluation(MaskHash(Literal("abcd-EFGH-8765-4321")), "60c713f5ec6912229d2060df1c322776")
checkEvaluation(MaskHash(Literal("")), "d41d8cd98f00b204e9800998ecf8427e")
checkEvaluation(MaskHash(Literal(null, StringType)), null)
// scalastyle:off nonascii
checkEvaluation(MaskHash(Literal("\u2200x9U")), "f1243ef123d516b1f32a3a75309e5711")
// scalastyle:on nonascii
}
}

View file

@ -3499,6 +3499,125 @@ object functions {
*/ */
def map_entries(e: Column): Column = withExpr { MapEntries(e.expr) } def map_entries(e: Column): Column = withExpr { MapEntries(e.expr) }
//////////////////////////////////////////////////////////////////////////////////////////////
// Mask functions
//////////////////////////////////////////////////////////////////////////////////////////////
/**
* Returns a string which is the masked representation of the input.
* @group mask_funcs
* @since 2.4.0
*/
def mask(e: Column): Column = withExpr { new Mask(e.expr) }
/**
* Returns a string which is the masked representation of the input, using `upper`, `lower` and
* `digit` as replacement characters.
* @group mask_funcs
* @since 2.4.0
*/
def mask(e: Column, upper: String, lower: String, digit: String): Column = withExpr {
Mask(e.expr, upper, lower, digit)
}
/**
* Returns a string with the first `n` characters masked.
* @group mask_funcs
* @since 2.4.0
*/
def mask_first_n(e: Column, n: Int): Column = withExpr { new MaskFirstN(e.expr, Literal(n)) }
/**
* Returns a string with the first `n` characters masked, using `upper`, `lower` and `digit` as
* replacement characters.
* @group mask_funcs
* @since 2.4.0
*/
def mask_first_n(
e: Column,
n: Int,
upper: String,
lower: String,
digit: String): Column = withExpr {
MaskFirstN(e.expr, n, upper, lower, digit)
}
/**
* Returns a string with the last `n` characters masked.
* @group mask_funcs
* @since 2.4.0
*/
def mask_last_n(e: Column, n: Int): Column = withExpr { new MaskLastN(e.expr, Literal(n)) }
/**
* Returns a string with the last `n` characters masked, using `upper`, `lower` and `digit` as
* replacement characters.
* @group mask_funcs
* @since 2.4.0
*/
def mask_last_n(
e: Column,
n: Int,
upper: String,
lower: String,
digit: String): Column = withExpr {
MaskLastN(e.expr, n, upper, lower, digit)
}
/**
* Returns a string with all but the first `n` characters masked.
* @group mask_funcs
* @since 2.4.0
*/
def mask_show_first_n(e: Column, n: Int): Column = withExpr {
new MaskShowFirstN(e.expr, Literal(n))
}
/**
* Returns a string with all but the first `n` characters masked, using `upper`, `lower` and
* `digit` as replacement characters.
* @group mask_funcs
* @since 2.4.0
*/
def mask_show_first_n(
e: Column,
n: Int,
upper: String,
lower: String,
digit: String): Column = withExpr {
MaskShowFirstN(e.expr, n, upper, lower, digit)
}
/**
* Returns a string with all but the last `n` characters masked.
* @group mask_funcs
* @since 2.4.0
*/
def mask_show_last_n(e: Column, n: Int): Column = withExpr {
new MaskShowLastN(e.expr, Literal(n))
}
/**
* Returns a string with all but the last `n` characters masked, using `upper`, `lower` and
* `digit` as replacement characters.
* @group mask_funcs
* @since 2.4.0
*/
def mask_show_last_n(
e: Column,
n: Int,
upper: String,
lower: String,
digit: String): Column = withExpr {
MaskShowLastN(e.expr, n, upper, lower, digit)
}
/**
* Returns a hashed value based on the input column.
* @group mask_funcs
* @since 2.4.0
*/
def mask_hash(e: Column): Column = withExpr { MaskHash(e.expr) }
// scalastyle:off line.size.limit // scalastyle:off line.size.limit
// scalastyle:off parameter.number // scalastyle:off parameter.number

View file

@ -276,6 +276,113 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
) )
} }
test("mask functions") {
val df = Seq("TestString-123", "", null).toDF("a")
checkAnswer(df.select(mask($"a")), Seq(Row("XxxxXxxxxx-nnn"), Row(""), Row(null)))
checkAnswer(df.select(mask_first_n($"a", 4)), Seq(Row("XxxxString-123"), Row(""), Row(null)))
checkAnswer(df.select(mask_last_n($"a", 4)), Seq(Row("TestString-nnn"), Row(""), Row(null)))
checkAnswer(df.select(mask_show_first_n($"a", 4)),
Seq(Row("TestXxxxxx-nnn"), Row(""), Row(null)))
checkAnswer(df.select(mask_show_last_n($"a", 4)),
Seq(Row("XxxxXxxxxx-123"), Row(""), Row(null)))
checkAnswer(df.select(mask_hash($"a")),
Seq(Row("dd78d68ad1b23bde126812482dd70ac6"),
Row("d41d8cd98f00b204e9800998ecf8427e"),
Row(null)))
checkAnswer(df.select(mask($"a", "U", "l", "#")),
Seq(Row("UlllUlllll-###"), Row(""), Row(null)))
checkAnswer(df.select(mask_first_n($"a", 4, "U", "l", "#")),
Seq(Row("UlllString-123"), Row(""), Row(null)))
checkAnswer(df.select(mask_last_n($"a", 4, "U", "l", "#")),
Seq(Row("TestString-###"), Row(""), Row(null)))
checkAnswer(df.select(mask_show_first_n($"a", 4, "U", "l", "#")),
Seq(Row("TestUlllll-###"), Row(""), Row(null)))
checkAnswer(df.select(mask_show_last_n($"a", 4, "U", "l", "#")),
Seq(Row("UlllUlllll-123"), Row(""), Row(null)))
checkAnswer(
df.selectExpr("mask(a)", "mask(a, 'U')", "mask(a, 'U', 'l')", "mask(a, 'U', 'l', '#')"),
Seq(Row("XxxxXxxxxx-nnn", "UxxxUxxxxx-nnn", "UlllUlllll-nnn", "UlllUlllll-###"),
Row("", "", "", ""),
Row(null, null, null, null)))
checkAnswer(sql("select mask(null)"), Row(null))
checkAnswer(sql("select mask('AAaa11', null, null, null)"), Row("XXxxnn"))
intercept[AnalysisException] {
checkAnswer(df.selectExpr("mask(a, a)"), Seq(Row("XxxxXxxxxx-nnn"), Row(""), Row(null)))
}
checkAnswer(
df.selectExpr(
"mask_first_n(a)",
"mask_first_n(a, 6)",
"mask_first_n(a, 6, 'U')",
"mask_first_n(a, 6, 'U', 'l')",
"mask_first_n(a, 6, 'U', 'l', '#')"),
Seq(Row("XxxxString-123", "XxxxXxring-123", "UxxxUxring-123", "UlllUlring-123",
"UlllUlring-123"),
Row("", "", "", "", ""),
Row(null, null, null, null, null)))
checkAnswer(sql("select mask_first_n(null)"), Row(null))
checkAnswer(sql("select mask_first_n('A1aA1a', null, null, null, null)"), Row("XnxX1a"))
intercept[AnalysisException] {
checkAnswer(spark.range(1).selectExpr("mask_first_n('A1aA1a', id)"), Row("XnxX1a"))
}
checkAnswer(
df.selectExpr(
"mask_last_n(a)",
"mask_last_n(a, 6)",
"mask_last_n(a, 6, 'U')",
"mask_last_n(a, 6, 'U', 'l')",
"mask_last_n(a, 6, 'U', 'l', '#')"),
Seq(Row("TestString-nnn", "TestStrixx-nnn", "TestStrixx-nnn", "TestStrill-nnn",
"TestStrill-###"),
Row("", "", "", "", ""),
Row(null, null, null, null, null)))
checkAnswer(sql("select mask_last_n(null)"), Row(null))
checkAnswer(sql("select mask_last_n('A1aA1a', null, null, null, null)"), Row("A1xXnx"))
intercept[AnalysisException] {
checkAnswer(spark.range(1).selectExpr("mask_last_n('A1aA1a', id)"), Row("A1xXnx"))
}
checkAnswer(
df.selectExpr(
"mask_show_first_n(a)",
"mask_show_first_n(a, 6)",
"mask_show_first_n(a, 6, 'U')",
"mask_show_first_n(a, 6, 'U', 'l')",
"mask_show_first_n(a, 6, 'U', 'l', '#')"),
Seq(Row("TestXxxxxx-nnn", "TestStxxxx-nnn", "TestStxxxx-nnn", "TestStllll-nnn",
"TestStllll-###"),
Row("", "", "", "", ""),
Row(null, null, null, null, null)))
checkAnswer(sql("select mask_show_first_n(null)"), Row(null))
checkAnswer(sql("select mask_show_first_n('A1aA1a', null, null, null, null)"), Row("A1aAnx"))
intercept[AnalysisException] {
checkAnswer(spark.range(1).selectExpr("mask_show_first_n('A1aA1a', id)"), Row("A1aAnx"))
}
checkAnswer(
df.selectExpr(
"mask_show_last_n(a)",
"mask_show_last_n(a, 6)",
"mask_show_last_n(a, 6, 'U')",
"mask_show_last_n(a, 6, 'U', 'l')",
"mask_show_last_n(a, 6, 'U', 'l', '#')"),
Seq(Row("XxxxXxxxxx-123", "XxxxXxxxng-123", "UxxxUxxxng-123", "UlllUlllng-123",
"UlllUlllng-123"),
Row("", "", "", "", ""),
Row(null, null, null, null, null)))
checkAnswer(sql("select mask_show_last_n(null)"), Row(null))
checkAnswer(sql("select mask_show_last_n('A1aA1a', null, null, null, null)"), Row("XnaA1a"))
intercept[AnalysisException] {
checkAnswer(spark.range(1).selectExpr("mask_show_last_n('A1aA1a', id)"), Row("XnaA1a"))
}
checkAnswer(sql("select mask_hash(null)"), Row(null))
}
test("sort_array/array_sort functions") { test("sort_array/array_sort functions") {
val df = Seq( val df = Seq(
(Array[Int](2, 1, 3), Array("b", "c", "a")), (Array[Int](2, 1, 3), Array("b", "c", "a")),