[SPARK-27947][SQL] Enhance redactOptions to accept any Map type

## What changes were proposed in this pull request?

Handle the case when ParsedStatement subclass has a Map field but not of type Map[String, String].

In ParsedStatement.productIterator, `case mapArg: Map[_, _]` can match any Map type due to type erasure, thus causing `asInstanceOf[Map[String, String]]` to throw ClassCastException.

The following test reproduces the issue:
```
case class TestStatement(p: Map[String, Int]) extends ParsedStatement {
 override def output: Seq[Attribute] = Nil
 override def children: Seq[LogicalPlan] = Nil
}

TestStatement(Map("abc" -> 1)).toString
```
Changing the code to `case mapArg: Map[String, String]` will not help due to type erasure. As a matter of fact, compiler gives this warning:
```
Warning:(41, 18) non-variable type argument String in type pattern
 scala.collection.immutable.Map[String,String] (the underlying of Map[String,String])
 is unchecked since it is eliminated by erasure
case mapArg: Map[String, String] =>
```

## How was this patch tested?

Add 2 unit tests.

Closes #24800 from jzhuge/SPARK-27947.

Authored-by: John Zhuge <jzhuge@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
John Zhuge 2019-06-10 11:58:37 -07:00 committed by Dongjoon Hyun
parent 3b37bfde2a
commit dbba3a33bc
4 changed files with 30 additions and 10 deletions

View file

@ -2596,7 +2596,7 @@ private[spark] object Utils extends Logging {
* Redact the sensitive values in the given map. If a map key matches the redaction pattern then
* its value is replaced with a dummy text.
*/
def redact(regex: Option[Regex], kvs: Seq[(String, String)]): Seq[(String, String)] = {
def redact[K, V](regex: Option[Regex], kvs: Seq[(K, V)]): Seq[(K, V)] = {
regex match {
case None => kvs
case Some(r) => redact(r, kvs)
@ -2618,7 +2618,7 @@ private[spark] object Utils extends Logging {
}
}
private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
private def redact[K, V](redactionPattern: Regex, kvs: Seq[(K, V)]): Seq[(K, V)] = {
// If the sensitive information regex matches with either the key or the value, redact the value
// While the original intent was to only redact the value if the key matched with the regex,
// we've found that especially in verbose mode, the value of the property may contain sensitive
@ -2632,12 +2632,19 @@ private[spark] object Utils extends Logging {
// arbitrary property contained the term 'password', we may redact the value from the UI and
// logs. In order to work around it, user would have to make the spark.redaction.regex property
// more specific.
kvs.map { case (key, value) =>
redactionPattern.findFirstIn(key)
.orElse(redactionPattern.findFirstIn(value))
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
.getOrElse((key, value))
}
kvs.map {
case (key: String, value: String) =>
redactionPattern.findFirstIn(key)
.orElse(redactionPattern.findFirstIn(value))
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
.getOrElse((key, value))
case (key, value: String) =>
redactionPattern.findFirstIn(value)
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
.getOrElse((key, value))
case (key, value) =>
(key, value)
}.asInstanceOf[Seq[(K, V)]]
}
/**

View file

@ -1120,6 +1120,19 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
assert(redactedCmdArgMap("spark.sensitive.property") === Utils.REDACTION_REPLACEMENT_TEXT)
}
test("redact sensitive information in sequence of key value pairs") {
val secretKeys = Some("my.password".r)
assert(Utils.redact(secretKeys, Seq(("spark.my.password", "12345"))) ===
Seq(("spark.my.password", Utils.REDACTION_REPLACEMENT_TEXT)))
assert(Utils.redact(secretKeys, Seq(("anything", "spark.my.password=12345"))) ===
Seq(("anything", Utils.REDACTION_REPLACEMENT_TEXT)))
assert(Utils.redact(secretKeys, Seq((999, "spark.my.password=12345"))) ===
Seq((999, Utils.REDACTION_REPLACEMENT_TEXT)))
// Do not redact when value type is not string
assert(Utils.redact(secretKeys, Seq(("my.password", 12345))) ===
Seq(("my.password", 12345)))
}
test("tryWithSafeFinally") {
var e = new Error("Block0")
val finallyBlockError = new Error("Finally Block")

View file

@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
private[sql] abstract class ParsedStatement extends LogicalPlan {
// Redact properties and options when parsed nodes are used by generic methods like toString
override def productIterator: Iterator[Any] = super.productIterator.map {
case mapArg: Map[_, _] => conf.redactOptions(mapArg.asInstanceOf[Map[String, String]])
case mapArg: Map[_, _] => conf.redactOptions(mapArg)
case other => other
}

View file

@ -2353,7 +2353,7 @@ class SQLConf extends Serializable with Logging {
/**
* Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
*/
def redactOptions(options: Map[String, String]): Map[String, String] = {
def redactOptions[K, V](options: Map[K, V]): Map[K, V] = {
val regexes = Seq(
getConf(SQL_OPTIONS_REDACTION_PATTERN),
SECRET_REDACTION_PATTERN.readFrom(reader))