[SPARK-33881][SQL][TESTS] Check null and empty string as partition values in DS v1 and v2 tests

### What changes were proposed in this pull request?
Add tests to check handling `null` and `''` (empty string) as partition values in commands `SHOW PARTITIONS`, `ALTER TABLE .. ADD PARTITION`, `ALTER TABLE .. DROP PARTITION`.

### Why are the changes needed?
To improve test coverage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the modified test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.ShowPartitionsSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.AlterTableAddPartitionSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.AlterTableDropPartitionSuite"
```

Closes #30893 from MaxGekk/partition-value-empty-string.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Max Gekk 2020-12-24 08:54:53 +00:00 committed by Wenchen Fan
parent 3e9821edfd
commit 54a67842e6
7 changed files with 92 additions and 3 deletions

View file

@ -17,6 +17,7 @@
package org.apache.spark.sql.execution.command.v1
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.execution.command
@ -35,6 +36,17 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head
assert(location.endsWith(expected))
}
test("empty string as partition value") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
val errMsg = intercept[AnalysisException] {
sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
}.getMessage
assert(errMsg.contains("Partition spec is invalid. " +
"The spec ([p1=]) contains an empty partition column value"))
}
}
}
class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with CommandSuiteBase

View file

@ -17,6 +17,7 @@
package org.apache.spark.sql.execution.command.v1
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.execution.command
trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
@ -35,4 +36,16 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu
class AlterTableDropPartitionSuite
extends AlterTableDropPartitionSuiteBase
with CommandSuiteBase
with CommandSuiteBase {
test("empty string as partition value") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
val errMsg = intercept[AnalysisException] {
sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')")
}.getMessage
assert(errMsg.contains("Partition spec is invalid. " +
"The spec ([p1=]) contains an empty partition column value"))
}
}
}

View file

@ -93,4 +93,23 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase
assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
}
}
test("null and empty string as partition values") {
import testImplicits._
withTable("t") {
val df = Seq((0, ""), (1, null)).toDF("a", "part")
df.write
.partitionBy("part")
.format("parquet")
.mode(SaveMode.Overwrite)
.saveAsTable("t")
runShowPartitionsSql(
"SHOW PARTITIONS t",
Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil)
checkAnswer(spark.table("t"),
Row(0, null) ::
Row(1, null) :: Nil)
}
}
}

View file

@ -59,4 +59,12 @@ class AlterTableAddPartitionSuite
assert(errMsg.contains(s"Table $t can not alter partitions"))
}
}
test("empty string as partition value") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
checkPartitions(t, Map("p1" -> ""))
}
}
}

View file

@ -50,4 +50,13 @@ class AlterTableDropPartitionSuite
}
}
}
test("empty string as partition value") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')")
checkPartitions(t)
}
}
}

View file

@ -45,7 +45,13 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu
.mode(SaveMode.Overwrite)
.saveAsTable(t)
runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil)
runShowPartitionsSql(
s"SHOW PARTITIONS $t",
Row("part=") ::
Row("part=null") :: Nil)
checkAnswer(spark.table(t),
Row(0, "") ::
Row(1, null) :: Nil)
}
}
}

View file

@ -17,6 +17,28 @@
package org.apache.spark.sql.hive.execution.command
import org.apache.spark.sql.{Row, SaveMode}
import org.apache.spark.sql.execution.command.v1
class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase
class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase {
test("null and empty string as partition values") {
import testImplicits._
withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
withTable("t") {
val df = Seq((0, ""), (1, null)).toDF("a", "part")
df.write
.partitionBy("part")
.format("hive")
.mode(SaveMode.Overwrite)
.saveAsTable("t")
runShowPartitionsSql(
"SHOW PARTITIONS t",
Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil)
checkAnswer(spark.table("t"),
Row(0, "__HIVE_DEFAULT_PARTITION__") ::
Row(1, "__HIVE_DEFAULT_PARTITION__") :: Nil)
}
}
}
}