[SPARK-23519][SQL] create view should work from query with duplicate output columns

**What changes were proposed in this pull request?**

Moving the call for checkColumnNameDuplication out of generateViewProperties. This way we can choose ifcheckColumnNameDuplication will be performed on analyzed or aliased plan without having to pass an additional argument(aliasedPlan) to generateViewProperties.

Before the pr column name duplication was performed on the query output of below sql(c1, c1) and the pr makes it perform check on the user provided schema of view definition(c1, c2)

**Why are the changes needed?**

Changes are to fix SPARK-23519 bug. Below queries would cause an exception. This pr fixes them and also added a test case.

`CREATE TABLE t23519 AS SELECT 1 AS c1
CREATE VIEW v23519 (c1, c2) AS SELECT c1, c1 FROM t23519`

**Does this PR introduce any user-facing change?**
No

**How was this patch tested?**
new unit test added in SQLViewSuite

Closes #25570 from hem1891/SPARK-23519.

Lead-authored-by: hemanth meka <hmeka@tibco.com>
Co-authored-by: hem1891 <hem1891@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
hemanth meka 2019-08-28 12:11:10 +08:00 committed by Wenchen Fan
parent 8848af2635
commit 6252c54e39
2 changed files with 21 additions and 7 deletions

View file

@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
import org.apache.spark.sql.catalyst.expressions.{Alias, SubqueryExpression}
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
import org.apache.spark.sql.types.MetadataBuilder
import org.apache.spark.sql.types.{MetadataBuilder, StructType}
import org.apache.spark.sql.util.SchemaUtils
@ -236,14 +236,15 @@ case class CreateViewCommand(
throw new AnalysisException(
"It is not allowed to create a persisted view from the Dataset API")
}
val newProperties = generateViewProperties(properties, session, analyzedPlan)
val aliasedSchema = aliasPlan(session, analyzedPlan).schema
val newProperties = generateViewProperties(
properties, session, analyzedPlan, aliasedSchema.fieldNames)
CatalogTable(
identifier = name,
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = aliasPlan(session, analyzedPlan).schema,
schema = aliasedSchema,
properties = newProperties,
viewOriginalText = originalText,
viewText = originalText,
@ -298,7 +299,8 @@ case class AlterViewAsCommand(
val viewIdent = viewMeta.identifier
checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
val newProperties = generateViewProperties(viewMeta.properties, session, analyzedPlan)
val newProperties = generateViewProperties(
viewMeta.properties, session, analyzedPlan, analyzedPlan.schema.fieldNames)
val updatedViewMeta = viewMeta.copy(
schema = analyzedPlan.schema,
@ -360,13 +362,15 @@ object ViewHelper {
def generateViewProperties(
properties: Map[String, String],
session: SparkSession,
analyzedPlan: LogicalPlan): Map[String, String] = {
analyzedPlan: LogicalPlan,
fieldNames: Array[String]): Map[String, String] = {
// for createViewCommand queryOutput may be different from fieldNames
val queryOutput = analyzedPlan.schema.fieldNames
// Generate the query column names, throw an AnalysisException if there exists duplicate column
// names.
SchemaUtils.checkColumnNameDuplication(
queryOutput, "in the view definition", session.sessionState.conf.resolver)
fieldNames, "in the view definition", session.sessionState.conf.resolver)
// Generate the view default database name.
val viewDefaultDatabase = session.sessionState.catalog.getCurrentDatabase

View file

@ -710,4 +710,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
}
}
}
test("SPARK-23519 view should be created even when query output contains duplicate col name") {
withTable("t23519") {
withView("v23519") {
sql("CREATE TABLE t23519 USING parquet AS SELECT 1 AS c1")
sql("CREATE VIEW v23519 (c1, c2) AS SELECT c1, c1 FROM t23519")
checkAnswer(sql("SELECT * FROM v23519"), Row(1, 1))
}
}
}
}