[SPARK-33453][SQL][TESTS] Unify v1 and v2 SHOW PARTITIONS tests

### What changes were proposed in this pull request?
1. Move `SHOW PARTITIONS` parsing tests to `ShowPartitionsParserSuite`
2. Place Hive tests for `SHOW PARTITIONS` from `HiveCommandSuite` to the base test suite `v1.ShowPartitionsSuiteBase`. This will allow to run the tests w/ and w/o Hive.

The changes follow the approach of https://github.com/apache/spark/pull/30287.

### Why are the changes needed?
- The unification will allow to run common `SHOW PARTITIONS` tests for both DSv1 and Hive DSv1, DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running:
- new test suites `build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowPartitionsSuite"`
- and old one `build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly org.apache.spark.sql.hive.execution.HiveCommandSuite"`

Closes #30377 from MaxGekk/unify-dsv1_v2-show-partitions-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Max Gekk 2020-11-16 16:11:42 +00:00 committed by Wenchen Fan
parent dfa6fb46f4
commit 6883f29465
10 changed files with 355 additions and 153 deletions

View file

@ -1629,32 +1629,6 @@ class DDLParserSuite extends AnalysisTest {
TruncateTableStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10"))))
}
test("SHOW PARTITIONS") {
val sql1 = "SHOW PARTITIONS t1"
val sql2 = "SHOW PARTITIONS db1.t1"
val sql3 = "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')"
val sql4 = "SHOW PARTITIONS a.b.c"
val sql5 = "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')"
val parsed1 = parsePlan(sql1)
val expected1 = ShowPartitionsStatement(Seq("t1"), None)
val parsed2 = parsePlan(sql2)
val expected2 = ShowPartitionsStatement(Seq("db1", "t1"), None)
val parsed3 = parsePlan(sql3)
val expected3 = ShowPartitionsStatement(Seq("t1"),
Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))
val parsed4 = parsePlan(sql4)
val expected4 = ShowPartitionsStatement(Seq("a", "b", "c"), None)
val parsed5 = parsePlan(sql5)
val expected5 = ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
comparePlans(parsed1, expected1)
comparePlans(parsed2, expected2)
comparePlans(parsed3, expected3)
comparePlans(parsed4, expected4)
comparePlans(parsed5, expected5)
}
test("REFRESH TABLE") {
comparePlans(
parsePlan("REFRESH TABLE a.b.c"),

View file

@ -1909,21 +1909,6 @@ class DataSourceV2SQLSuite
}
}
test("SHOW PARTITIONS") {
val t = "testcat.ns1.ns2.tbl"
withTable(t) {
sql(
s"""
|CREATE TABLE $t (id bigint, data string)
|USING foo
|PARTITIONED BY (id)
""".stripMargin)
testV1Command("SHOW PARTITIONS", t)
testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
}
}
test("LOAD DATA INTO TABLE") {
val t = "testcat.ns1.ns2.tbl"
withTable(t) {

View file

@ -181,7 +181,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
sql(s"SHOW CREATE TABLE $viewName")
}.getMessage
assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
assertNoSuchTable(s"SHOW PARTITIONS $viewName")
val e4 = intercept[AnalysisException] {
sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
}.getMessage

View file

@ -359,14 +359,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
assert(e.contains("Found duplicate keys 'a'"))
}
test("empty values in non-optional partition specs") {
val e = intercept[ParseException] {
parser.parsePlan(
"SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
}.getMessage
assert(e.contains("Found an empty partition key 'b'"))
}
test("Test CTAS #1") {
val s1 =
"""

View file

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.command
import org.apache.spark.sql.catalyst.analysis.AnalysisTest
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.ShowPartitionsStatement
import org.apache.spark.sql.execution.SparkSqlParser
import org.apache.spark.sql.test.SharedSparkSession
class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
test("SHOW PARTITIONS") {
Seq(
"SHOW PARTITIONS t1" -> ShowPartitionsStatement(Seq("t1"), None),
"SHOW PARTITIONS db1.t1" -> ShowPartitionsStatement(Seq("db1", "t1"), None),
"SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')" ->
ShowPartitionsStatement(
Seq("t1"),
Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue"))),
"SHOW PARTITIONS a.b.c" -> ShowPartitionsStatement(Seq("a", "b", "c"), None),
"SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')" ->
ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
).foreach { case (sql, expected) =>
val parsed = parsePlan(sql)
comparePlans(parsed, expected)
}
}
test("empty values in non-optional partition specs") {
val e = intercept[ParseException] {
new SparkSqlParser().parsePlan(
"SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
}.getMessage
assert(e.contains("Found an empty partition key 'b'"))
}
}

View file

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.command
import org.scalactic.source.Position
import org.scalatest.Tag
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.test.SQLTestUtils
trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
protected def version: String
protected def catalog: String
protected def defaultNamespace: Seq[String]
protected def defaultUsing: String
override def test(testName: String, testTags: Tag*)(testFun: => Any)
(implicit pos: Position): Unit = {
super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun)
}
}

View file

@ -0,0 +1,184 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.command.v1
import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.connector.catalog.CatalogManager
import org.apache.spark.sql.execution.command
import org.apache.spark.sql.test.SharedSparkSession
trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
override def version: String = "V1"
override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
override def defaultNamespace: Seq[String] = Seq("default")
override def defaultUsing: String = "USING parquet"
private def createDateTable(table: String): Unit = {
sql(s"""
|CREATE TABLE $table (price int, qty int, year int, month int)
|$defaultUsing
|partitioned by (year, month)""".stripMargin)
sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1")
sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2")
sql(s"INSERT INTO $table PARTITION(year = 2016, month = 2) SELECT 3, 3")
sql(s"INSERT INTO $table PARTITION(year = 2016, month = 3) SELECT 3, 3")
}
test("show everything") {
val table = "dateTable"
withTable(table) {
createDateTable(table)
checkAnswer(
sql(s"show partitions $table"),
Row("year=2015/month=1") ::
Row("year=2015/month=2") ::
Row("year=2016/month=2") ::
Row("year=2016/month=3") :: Nil)
checkAnswer(
sql(s"show partitions default.$table"),
Row("year=2015/month=1") ::
Row("year=2015/month=2") ::
Row("year=2016/month=2") ::
Row("year=2016/month=3") :: Nil)
}
}
test("filter by partitions") {
val table = "dateTable"
withTable(table) {
createDateTable(table)
checkAnswer(
sql(s"show partitions default.$table PARTITION(year=2015)"),
Row("year=2015/month=1") ::
Row("year=2015/month=2") :: Nil)
checkAnswer(
sql(s"show partitions default.$table PARTITION(year=2015, month=1)"),
Row("year=2015/month=1") :: Nil)
checkAnswer(
sql(s"show partitions default.$table PARTITION(month=2)"),
Row("year=2015/month=2") ::
Row("year=2016/month=2") :: Nil)
}
}
test("show everything more than 5 part keys") {
val table = "wideTable"
withTable(table) {
sql(s"""
|CREATE TABLE $table (
| price int, qty int,
| year int, month int, hour int, minute int, sec int, extra int)
|$defaultUsing
|PARTITIONED BY (year, month, hour, minute, sec, extra)""".stripMargin)
sql(s"""
|INSERT INTO $table
|PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
""".stripMargin)
sql(s"""
|INSERT INTO $table
|PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
""".stripMargin)
checkAnswer(
sql(s"show partitions $table"),
Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
}
}
test("non-partitioning columns") {
val table = "dateTable"
withTable(table) {
createDateTable(table)
val errMsg = intercept[AnalysisException] {
sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
}.getMessage
assert(errMsg.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
}
}
test("show partitions of non-partitioned table") {
val table = "not_partitioned_table"
withTable(table) {
sql(s"CREATE TABLE $table (col1 int) $defaultUsing")
val errMsg = intercept[AnalysisException] {
sql(s"SHOW PARTITIONS $table")
}.getMessage
assert(errMsg.contains("not allowed on a table that is not partitioned"))
}
}
test("show partitions of a view") {
val table = "dateTable"
withTable(table) {
createDateTable(table)
val view = "view1"
withView(view) {
sql(s"CREATE VIEW $view as select * from $table")
val errMsg = intercept[AnalysisException] {
sql(s"SHOW PARTITIONS $view")
}.getMessage
assert(errMsg.contains("is not allowed on a view"))
}
}
}
test("show partitions of a temporary view") {
val viewName = "test_view"
withTempView(viewName) {
spark.range(10).createTempView(viewName)
val errMsg = intercept[NoSuchTableException] {
sql(s"SHOW PARTITIONS $viewName")
}.getMessage
assert(errMsg.contains(s"Table or view '$viewName' not found"))
}
}
}
class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSession {
// The test is placed here because it fails with `USING HIVE`:
// org.apache.spark.sql.AnalysisException:
// Hive data source can only be used with tables, you can't use it with CREATE TEMP VIEW USING
test("issue exceptions on the temporary view") {
val viewName = "test_view"
withTempView(viewName) {
sql(s"""
|CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING)
|$defaultUsing""".stripMargin)
val errMsg = intercept[NoSuchTableException] {
sql(s"SHOW PARTITIONS $viewName")
}.getMessage
assert(errMsg.contains(s"Table or view '$viewName' not found"))
}
}
test("show partitions from a datasource") {
import testImplicits._
withTable("part_datasrc") {
val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
df.write
.partitionBy("a")
.format("parquet")
.mode(SaveMode.Overwrite)
.saveAsTable("part_datasrc")
assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
}
}
}

View file

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.command.v2
import org.apache.spark.SparkConf
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.connector.InMemoryTableCatalog
import org.apache.spark.sql.execution.command
import org.apache.spark.sql.test.SharedSparkSession
class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession {
override def version: String = "V2"
override def catalog: String = "test_catalog"
override def defaultNamespace: Seq[String] = Nil
override def defaultUsing: String = "USING _"
override def sparkConf: SparkConf = super.sparkConf
.set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
// TODO(SPARK-33452): Create a V2 SHOW PARTITIONS execution node
test("not supported SHOW PARTITIONS") {
def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
val e = intercept[AnalysisException] {
sql(s"$sqlCommand $sqlParams")
}
assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
}
val t = s"$catalog.ns1.ns2.tbl"
withTable(t) {
sql(
s"""
|CREATE TABLE $t (id bigint, data string)
|$defaultUsing
|PARTITIONED BY (id)
""".stripMargin)
testV1Command("SHOW PARTITIONS", t)
testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
}
}
}

View file

@ -22,9 +22,8 @@ import java.io.File
import com.google.common.io.Files
import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.execution.command.LoadDataCommand
import org.apache.spark.sql.hive.test.TestHiveSingleton
@ -33,7 +32,6 @@ import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.StructType
class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
import testImplicits._
protected override def beforeAll(): Unit = {
super.beforeAll()
@ -58,27 +56,11 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
|STORED AS PARQUET
|TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val")
""".stripMargin)
sql("CREATE TABLE parquet_tab3(col1 int, `col 2` int) USING hive")
sql("CREATE TABLE parquet_tab4 (price int, qty int) partitioned by (year int, month int)")
sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 1) SELECT 1, 1")
sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 2) SELECT 2, 2")
sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 2) SELECT 3, 3")
sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 3) SELECT 3, 3")
sql(
"""
|CREATE TABLE parquet_tab5 (price int, qty int)
|PARTITIONED BY (year int, month int, hour int, minute int, sec int, extra int)
""".stripMargin)
sql(
"""
|INSERT INTO parquet_tab5
|PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
""".stripMargin)
sql(
"""
|INSERT INTO parquet_tab5
|PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
""".stripMargin)
sql("CREATE VIEW parquet_view1 as select * from parquet_tab4")
}
@ -86,10 +68,8 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
try {
sql("DROP TABLE IF EXISTS parquet_tab1")
sql("DROP TABLE IF EXISTS parquet_tab2")
sql("DROP TABLE IF EXISTS parquet_tab3")
sql("DROP VIEW IF EXISTS parquet_view1")
sql("DROP TABLE IF EXISTS parquet_tab4")
sql("DROP TABLE IF EXISTS parquet_tab5")
} finally {
super.afterAll()
}
@ -393,88 +373,6 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
}
}
test("show partitions - show everything") {
checkAnswer(
sql("show partitions parquet_tab4"),
Row("year=2015/month=1") ::
Row("year=2015/month=2") ::
Row("year=2016/month=2") ::
Row("year=2016/month=3") :: Nil)
checkAnswer(
sql("show partitions default.parquet_tab4"),
Row("year=2015/month=1") ::
Row("year=2015/month=2") ::
Row("year=2016/month=2") ::
Row("year=2016/month=3") :: Nil)
}
test("show partitions - show everything more than 5 part keys") {
checkAnswer(
sql("show partitions parquet_tab5"),
Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
}
test("show partitions - filter") {
checkAnswer(
sql("show partitions default.parquet_tab4 PARTITION(year=2015)"),
Row("year=2015/month=1") ::
Row("year=2015/month=2") :: Nil)
checkAnswer(
sql("show partitions default.parquet_tab4 PARTITION(year=2015, month=1)"),
Row("year=2015/month=1") :: Nil)
checkAnswer(
sql("show partitions default.parquet_tab4 PARTITION(month=2)"),
Row("year=2015/month=2") ::
Row("year=2016/month=2") :: Nil)
}
test("show partitions - empty row") {
withTempView("parquet_temp") {
sql(
"""
|CREATE TEMPORARY VIEW parquet_temp (c1 INT, c2 STRING)
|USING org.apache.spark.sql.parquet.DefaultSource
""".stripMargin)
// An empty sequence of row is returned for session temporary table.
intercept[NoSuchTableException] {
sql("SHOW PARTITIONS parquet_temp")
}
val message1 = intercept[AnalysisException] {
sql("SHOW PARTITIONS parquet_tab3")
}.getMessage
assert(message1.contains("not allowed on a table that is not partitioned"))
val message2 = intercept[AnalysisException] {
sql("SHOW PARTITIONS parquet_tab4 PARTITION(abcd=2015, xyz=1)")
}.getMessage
assert(message2.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
val message3 = intercept[AnalysisException] {
sql("SHOW PARTITIONS parquet_view1")
}.getMessage
assert(message3.contains("is not allowed on a view"))
}
}
test("show partitions - datasource") {
withTable("part_datasrc") {
val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
df.write
.partitionBy("a")
.format("parquet")
.mode(SaveMode.Overwrite)
.saveAsTable("part_datasrc")
assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
}
}
test("SPARK-25918: LOAD DATA LOCAL INPATH should handle a relative path") {
val localFS = FileContext.getLocalFSFileContext()
val workingDir = localFS.getWorkingDirectory

View file

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.hive.execution.command
import org.apache.spark.sql.execution.command.v1
import org.apache.spark.sql.hive.test.TestHiveSingleton
class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with TestHiveSingleton {
override def version: String = "Hive V1"
override def defaultUsing: String = "USING HIVE"
}