diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 2617add6ff..b315148272 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable import org.apache.spark.sql.catalyst.expressions.{Alias, SubqueryExpression} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View} -import org.apache.spark.sql.types.MetadataBuilder +import org.apache.spark.sql.types.{MetadataBuilder, StructType} import org.apache.spark.sql.util.SchemaUtils @@ -236,14 +236,15 @@ case class CreateViewCommand( throw new AnalysisException( "It is not allowed to create a persisted view from the Dataset API") } - - val newProperties = generateViewProperties(properties, session, analyzedPlan) + val aliasedSchema = aliasPlan(session, analyzedPlan).schema + val newProperties = generateViewProperties( + properties, session, analyzedPlan, aliasedSchema.fieldNames) CatalogTable( identifier = name, tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, - schema = aliasPlan(session, analyzedPlan).schema, + schema = aliasedSchema, properties = newProperties, viewOriginalText = originalText, viewText = originalText, @@ -298,7 +299,8 @@ case class AlterViewAsCommand( val viewIdent = viewMeta.identifier checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) - val newProperties = generateViewProperties(viewMeta.properties, session, analyzedPlan) + val newProperties = generateViewProperties( + viewMeta.properties, session, analyzedPlan, analyzedPlan.schema.fieldNames) val updatedViewMeta = viewMeta.copy( schema = analyzedPlan.schema, @@ -360,13 +362,15 @@ object ViewHelper { def generateViewProperties( properties: Map[String, String], session: SparkSession, - analyzedPlan: LogicalPlan): Map[String, String] = { + analyzedPlan: LogicalPlan, + fieldNames: Array[String]): Map[String, String] = { + // for createViewCommand queryOutput may be different from fieldNames val queryOutput = analyzedPlan.schema.fieldNames // Generate the query column names, throw an AnalysisException if there exists duplicate column // names. SchemaUtils.checkColumnNameDuplication( - queryOutput, "in the view definition", session.sessionState.conf.resolver) + fieldNames, "in the view definition", session.sessionState.conf.resolver) // Generate the view default database name. val viewDefaultDatabase = session.sessionState.catalog.getCurrentDatabase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 347a14c912..ff84b05713 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -710,4 +710,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-23519 view should be created even when query output contains duplicate col name") { + withTable("t23519") { + withView("v23519") { + sql("CREATE TABLE t23519 USING parquet AS SELECT 1 AS c1") + sql("CREATE VIEW v23519 (c1, c2) AS SELECT c1, c1 FROM t23519") + checkAnswer(sql("SELECT * FROM v23519"), Row(1, 1)) + } + } + } }