diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index 3da66afced..2d890118ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -73,7 +73,8 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo className = table.provider.get, bucketSpec = table.bucketSpec, options = table.storage.properties ++ pathOption, - catalogTable = Some(tableWithDefaultOptions)).resolveRelation() + // As discussed in SPARK-19583, we don't check if the location is existed + catalogTable = Some(tableWithDefaultOptions)).resolveRelation(checkFilesExist = false) val partitionColumnNames = if (table.schema.nonEmpty) { table.partitionColumnNames diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 5f70a8ce89..0666f446f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -230,7 +230,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } private def getDBPath(dbName: String): URI = { - val warehousePath = makeQualifiedPath(s"${spark.sessionState.conf.warehousePath}") + val warehousePath = makeQualifiedPath(spark.sessionState.conf.warehousePath) new Path(CatalogUtils.URIToString(warehousePath), s"$dbName.db").toUri } @@ -1899,7 +1899,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - test("insert data to a data source table which has a not existed location should succeed") { + test("insert data to a data source table which has a non-existing location should succeed") { withTable("t") { withTempDir { dir => spark.sql( @@ -1939,7 +1939,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - test("insert into a data source table with no existed partition location should succeed") { + test("insert into a data source table with a non-existing partition location should succeed") { withTable("t") { withTempDir { dir => spark.sql( @@ -1966,7 +1966,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - test("read data from a data source table which has a not existed location should succeed") { + test("read data from a data source table which has a non-existing location should succeed") { withTable("t") { withTempDir { dir => spark.sql( @@ -1994,7 +1994,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - test("read data from a data source table with no existed partition location should succeed") { + test("read data from a data source table with non-existing partition location should succeed") { withTable("t") { withTempDir { dir => spark.sql( @@ -2016,48 +2016,72 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } + test("create datasource table with a non-existing location") { + withTable("t", "t1") { + withTempPath { dir => + spark.sql(s"CREATE TABLE t(a int, b int) USING parquet LOCATION '$dir'") + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + spark.sql("INSERT INTO TABLE t SELECT 1, 2") + assert(dir.exists()) + + checkAnswer(spark.table("t"), Row(1, 2)) + } + // partition table + withTempPath { dir => + spark.sql(s"CREATE TABLE t1(a int, b int) USING parquet PARTITIONED BY(a) LOCATION '$dir'") + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + spark.sql("INSERT INTO TABLE t1 PARTITION(a=1) SELECT 2") + + val partDir = new File(dir, "a=1") + assert(partDir.exists()) + + checkAnswer(spark.table("t1"), Row(2, 1)) + } + } + } + Seq(true, false).foreach { shouldDelete => - val tcName = if (shouldDelete) "non-existent" else "existed" + val tcName = if (shouldDelete) "non-existing" else "existed" test(s"CTAS for external data source table with a $tcName location") { withTable("t", "t1") { - withTempDir { - dir => - if (shouldDelete) { - dir.delete() - } - spark.sql( - s""" - |CREATE TABLE t - |USING parquet - |LOCATION '$dir' - |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) - assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + withTempDir { dir => + if (shouldDelete) dir.delete() + spark.sql( + s""" + |CREATE TABLE t + |USING parquet + |LOCATION '$dir' + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) - checkAnswer(spark.table("t"), Row(3, 4, 1, 2)) + checkAnswer(spark.table("t"), Row(3, 4, 1, 2)) } // partition table - withTempDir { - dir => - if (shouldDelete) { - dir.delete() - } - spark.sql( - s""" - |CREATE TABLE t1 - |USING parquet - |PARTITIONED BY(a, b) - |LOCATION '$dir' - |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) - assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + withTempDir { dir => + if (shouldDelete) dir.delete() + spark.sql( + s""" + |CREATE TABLE t1 + |USING parquet + |PARTITIONED BY(a, b) + |LOCATION '$dir' + |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) - val partDir = new File(dir, "a=3") - assert(partDir.exists()) + val partDir = new File(dir, "a=3") + assert(partDir.exists()) - checkAnswer(spark.table("t1"), Row(1, 2, 3, 4)) + checkAnswer(spark.table("t1"), Row(1, 2, 3, 4)) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 79ad156c55..d29242bb47 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1663,43 +1663,73 @@ class HiveDDLSuite } } + test("create hive table with a non-existing location") { + withTable("t", "t1") { + withTempPath { dir => + spark.sql(s"CREATE TABLE t(a int, b int) USING hive LOCATION '$dir'") + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + spark.sql("INSERT INTO TABLE t SELECT 1, 2") + assert(dir.exists()) + + checkAnswer(spark.table("t"), Row(1, 2)) + } + // partition table + withTempPath { dir => + spark.sql( + s""" + |CREATE TABLE t1(a int, b int) + |USING hive + |PARTITIONED BY(a) + |LOCATION '$dir' + """.stripMargin) + + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) + assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) + + spark.sql("INSERT INTO TABLE t1 PARTITION(a=1) SELECT 2") + + val partDir = new File(dir, "a=1") + assert(partDir.exists()) + + checkAnswer(spark.table("t1"), Row(2, 1)) + } + } + } + Seq(true, false).foreach { shouldDelete => - val tcName = if (shouldDelete) "non-existent" else "existed" - test(s"CTAS for external data source table with a $tcName location") { + val tcName = if (shouldDelete) "non-existing" else "existed" + + test(s"CTAS for external hive table with a $tcName location") { withTable("t", "t1") { - withTempDir { - dir => - if (shouldDelete) { - dir.delete() - } + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + withTempDir { dir => + if (shouldDelete) dir.delete() spark.sql( s""" |CREATE TABLE t - |USING parquet + |USING hive |LOCATION '$dir' |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) checkAnswer(spark.table("t"), Row(3, 4, 1, 2)) - } - // partition table - withTempDir { - dir => - if (shouldDelete) { - dir.delete() - } + } + // partition table + withTempDir { dir => + if (shouldDelete) dir.delete() spark.sql( s""" |CREATE TABLE t1 - |USING parquet + |USING hive |PARTITIONED BY(a, b) |LOCATION '$dir' |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) @@ -1707,51 +1737,6 @@ class HiveDDLSuite assert(partDir.exists()) checkAnswer(spark.table("t1"), Row(1, 2, 3, 4)) - } - } - } - - test(s"CTAS for external hive table with a $tcName location") { - withTable("t", "t1") { - withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { - withTempDir { - dir => - if (shouldDelete) { - dir.delete() - } - spark.sql( - s""" - |CREATE TABLE t - |USING hive - |LOCATION '$dir' - |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) - assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) - - checkAnswer(spark.table("t"), Row(3, 4, 1, 2)) - } - // partition table - withTempDir { - dir => - if (shouldDelete) { - dir.delete() - } - spark.sql( - s""" - |CREATE TABLE t1 - |USING hive - |PARTITIONED BY(a, b) - |LOCATION '$dir' - |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1")) - assert(table.location == makeQualifiedPath(dir.getAbsolutePath)) - - val partDir = new File(dir, "a=3") - assert(partDir.exists()) - - checkAnswer(spark.table("t1"), Row(1, 2, 3, 4)) } } }