[SPARK-30614][SQL] The native ALTER COLUMN syntax should change one property at a time

### What changes were proposed in this pull request?

The current ALTER COLUMN syntax allows to change multiple properties at a time:
```
ALTER TABLE table=multipartIdentifier
  (ALTER | CHANGE) COLUMN? column=multipartIdentifier
  (TYPE dataType)?
  (COMMENT comment=STRING)?
  colPosition?
```
The SQL standard (section 11.12) only allows changing one property at a time. This is also true on other recent SQL systems like [snowflake](https://docs.snowflake.net/manuals/sql-reference/sql/alter-table-column.html) and [redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE.html). (credit to cloud-fan)

This PR proposes to change ALTER COLUMN to follow SQL standard, thus allows altering only one column property at a time.

Note that ALTER COLUMN syntax being changed here is newly added in Spark 3.0, so it doesn't affect Spark 2.4 behavior.

### Why are the changes needed?

To follow SQL standard (and other recent SQL systems) behavior.

### Does this PR introduce any user-facing change?

Yes, now the user can update the column properties only one at a time.

For example,
```
ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment'
```
should be broken into
```
ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint
ALTER TABLE table1 ALTER COLUMN a.b.c COMMENT 'new comment'
```

### How was this patch tested?

Updated existing tests.

Closes #27444 from imback82/alter_column_one_at_a_time.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Terry Kim 2020-02-08 02:47:44 +08:00 committed by Wenchen Fan
parent a3e77773cf
commit a7451f44d2
9 changed files with 167 additions and 108 deletions

View file

@ -158,12 +158,9 @@ statement
SET TBLPROPERTIES tablePropertyList #setTableProperties
| ALTER (TABLE | VIEW) multipartIdentifier
UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties
| ALTER TABLE table=multipartIdentifier
|ALTER TABLE table=multipartIdentifier
(ALTER | CHANGE) COLUMN? column=multipartIdentifier
(TYPE dataType)? commentSpec? colPosition? #alterTableColumn
| ALTER TABLE table=multipartIdentifier
ALTER COLUMN? column=multipartIdentifier
setOrDrop=(SET | DROP) NOT NULL #alterColumnNullability
alterColumnAction? #alterTableAlterColumn
| ALTER TABLE table=multipartIdentifier partitionSpec?
CHANGE COLUMN?
colName=multipartIdentifier colType colPosition? #hiveChangeColumn
@ -983,6 +980,13 @@ number
| MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
;
alterColumnAction
: TYPE dataType
| commentSpec
| colPosition
| setOrDrop=(SET | DROP) NOT NULL
;
// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
// - Reserved keywords:
// Keywords that are reserved and can't be used as identifiers for table, view, column,

View file

@ -2940,55 +2940,61 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
/**
* Parse a [[AlterTableAlterColumnStatement]] command.
* Parse a [[AlterTableAlterColumnStatement]] command to alter a column's property.
*
* For example:
* {{{
* ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint
* ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment'
* ALTER TABLE table1 ALTER COLUMN a.b.c SET NOT NULL
* ALTER TABLE table1 ALTER COLUMN a.b.c DROP NOT NULL
* ALTER TABLE table1 ALTER COLUMN a.b.c COMMENT 'new comment'
* ALTER TABLE table1 ALTER COLUMN a.b.c FIRST
* ALTER TABLE table1 ALTER COLUMN a.b.c AFTER x
* }}}
*/
override def visitAlterTableColumn(
ctx: AlterTableColumnContext): LogicalPlan = withOrigin(ctx) {
val verb = if (ctx.CHANGE != null) "CHANGE" else "ALTER"
if (ctx.dataType == null && ctx.commentSpec() == null && ctx.colPosition == null) {
override def visitAlterTableAlterColumn(
ctx: AlterTableAlterColumnContext): LogicalPlan = withOrigin(ctx) {
val action = ctx.alterColumnAction
if (action == null) {
val verb = if (ctx.CHANGE != null) "CHANGE" else "ALTER"
operationNotAllowed(
s"ALTER TABLE table $verb COLUMN requires a TYPE or a COMMENT or a FIRST/AFTER", ctx)
s"ALTER TABLE table $verb COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER",
ctx)
}
val dataType = if (action.dataType != null) {
Some(typedVisit[DataType](action.dataType))
} else {
None
}
val nullable = if (action.setOrDrop != null) {
action.setOrDrop.getType match {
case SqlBaseParser.SET => Some(false)
case SqlBaseParser.DROP => Some(true)
}
} else {
None
}
val comment = if (action.commentSpec != null) {
Some(visitCommentSpec(action.commentSpec()))
} else {
None
}
val position = if (action.colPosition != null) {
Some(typedVisit[ColumnPosition](action.colPosition))
} else {
None
}
assert(Seq(dataType, nullable, comment, position).count(_.nonEmpty) == 1)
AlterTableAlterColumnStatement(
visitMultipartIdentifier(ctx.table),
typedVisit[Seq[String]](ctx.column),
dataType = Option(ctx.dataType).map(typedVisit[DataType]),
nullable = None,
comment = Option(ctx.commentSpec()).map(visitCommentSpec),
position = Option(ctx.colPosition).map(typedVisit[ColumnPosition]))
}
/**
* Parse a [[AlterTableAlterColumnStatement]] command to change column nullability.
*
* For example:
* {{{
* ALTER TABLE table1 ALTER COLUMN a.b.c SET NOT NULL
* ALTER TABLE table1 ALTER COLUMN a.b.c DROP NOT NULL
* }}}
*/
override def visitAlterColumnNullability(ctx: AlterColumnNullabilityContext): LogicalPlan = {
withOrigin(ctx) {
val nullable = ctx.setOrDrop.getType match {
case SqlBaseParser.SET => false
case SqlBaseParser.DROP => true
}
AlterTableAlterColumnStatement(
visitMultipartIdentifier(ctx.table),
typedVisit[Seq[String]](ctx.column),
dataType = None,
nullable = Some(nullable),
comment = None,
position = None)
}
dataType = dataType,
nullable = nullable,
comment = comment,
position = position)
}
/**

View file

@ -646,17 +646,18 @@ class DDLParserSuite extends AnalysisTest {
Some(first())))
}
test("alter table: update column type, comment and position") {
comparePlans(
parsePlan("ALTER TABLE table_name CHANGE COLUMN a.b.c " +
"TYPE bigint COMMENT 'new comment' AFTER d"),
AlterTableAlterColumnStatement(
Seq("table_name"),
Seq("a", "b", "c"),
Some(LongType),
None,
Some("new comment"),
Some(after("d"))))
test("alter table: mutiple property changes are not allowed") {
intercept[ParseException] {
parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
"TYPE bigint COMMENT 'new comment'")}
intercept[ParseException] {
parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
"TYPE bigint COMMENT AFTER d")}
intercept[ParseException] {
parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
"TYPE bigint COMMENT 'new comment' AFTER d")}
}
test("alter table: SET/DROP NOT NULL") {

View file

@ -77,10 +77,6 @@ class ResolveSessionCatalog(
throw new AnalysisException(
"ALTER COLUMN with qualified column is only supported with v2 tables.")
}
if (a.dataType.isEmpty) {
throw new AnalysisException(
"ALTER COLUMN with v1 tables must specify new data type.")
}
if (a.nullable.isDefined) {
throw new AnalysisException(
"ALTER COLUMN with v1 tables cannot specify NOT NULL.")
@ -92,17 +88,27 @@ class ResolveSessionCatalog(
val builder = new MetadataBuilder
// Add comment to metadata
a.comment.map(c => builder.putString("comment", c))
val colName = a.column(0)
val dataType = a.dataType.getOrElse {
v1Table.schema.findNestedField(Seq(colName), resolver = conf.resolver)
.map(_._2.dataType)
.getOrElse {
throw new AnalysisException(
s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " +
s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
}
}
// Add Hive type string to metadata.
val cleanedDataType = HiveStringType.replaceCharType(a.dataType.get)
if (a.dataType.get != cleanedDataType) {
builder.putString(HIVE_TYPE_STRING, a.dataType.get.catalogString)
val cleanedDataType = HiveStringType.replaceCharType(dataType)
if (dataType != cleanedDataType) {
builder.putString(HIVE_TYPE_STRING, dataType.catalogString)
}
val newColumn = StructField(
a.column(0),
colName,
cleanedDataType,
nullable = true,
builder.build())
AlterTableChangeColumnCommand(tbl.asTableIdentifier, a.column(0), newColumn)
AlterTableChangeColumnCommand(tbl.asTableIdentifier, colName, newColumn)
}.getOrElse {
val colName = a.column.toArray
val typeChange = a.dataType.map { newDataType =>

View file

@ -15,29 +15,34 @@ ALTER TABLE test_change CHANGE a TYPE STRING;
DESC test_change;
-- Change column position (not supported yet)
ALTER TABLE test_change CHANGE a TYPE INT AFTER b;
ALTER TABLE test_change CHANGE b TYPE STRING FIRST;
ALTER TABLE test_change CHANGE a AFTER b;
ALTER TABLE test_change CHANGE b FIRST;
DESC test_change;
-- Change column comment
ALTER TABLE test_change CHANGE a TYPE INT COMMENT 'this is column a';
ALTER TABLE test_change CHANGE b TYPE STRING COMMENT '#*02?`';
ALTER TABLE test_change CHANGE c TYPE INT COMMENT '';
ALTER TABLE test_change CHANGE a COMMENT 'this is column a';
ALTER TABLE test_change CHANGE b COMMENT '#*02?`';
ALTER TABLE test_change CHANGE c COMMENT '';
DESC test_change;
-- Don't change anything.
ALTER TABLE test_change CHANGE a TYPE INT COMMENT 'this is column a';
ALTER TABLE test_change CHANGE a TYPE INT;
ALTER TABLE test_change CHANGE a COMMENT 'this is column a';
DESC test_change;
-- Change a invalid column
ALTER TABLE test_change CHANGE invalid_col TYPE INT;
DESC test_change;
-- Check case insensitivity.
ALTER TABLE test_change CHANGE A COMMENT 'case insensitivity';
DESC test_change;
-- Change column can't apply to a temporary/global_temporary view
CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one";
ALTER TABLE temp_view CHANGE a TYPE INT COMMENT 'this is column a';
ALTER TABLE temp_view CHANGE a TYPE INT;
CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one";
ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT COMMENT 'this is column a';
ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT;
-- DROP TEST TABLE
DROP TABLE test_change;

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 25
-- Number of queries: 28
-- !query
@ -27,7 +27,7 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
Operation not allowed: ALTER TABLE table CHANGE COLUMN requires a TYPE or a COMMENT or a FIRST/AFTER(line 1, pos 0)
Operation not allowed: ALTER TABLE table CHANGE COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER(line 1, pos 0)
== SQL ==
ALTER TABLE test_change CHANGE a
@ -83,7 +83,7 @@ c int
-- !query
ALTER TABLE test_change CHANGE a TYPE INT AFTER b
ALTER TABLE test_change CHANGE a AFTER b
-- !query schema
struct<>
-- !query output
@ -92,7 +92,7 @@ ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.;
-- !query
ALTER TABLE test_change CHANGE b TYPE STRING FIRST
ALTER TABLE test_change CHANGE b FIRST
-- !query schema
struct<>
-- !query output
@ -111,7 +111,7 @@ c int
-- !query
ALTER TABLE test_change CHANGE a TYPE INT COMMENT 'this is column a'
ALTER TABLE test_change CHANGE a COMMENT 'this is column a'
-- !query schema
struct<>
-- !query output
@ -119,7 +119,7 @@ struct<>
-- !query
ALTER TABLE test_change CHANGE b TYPE STRING COMMENT '#*02?`'
ALTER TABLE test_change CHANGE b COMMENT '#*02?`'
-- !query schema
struct<>
-- !query output
@ -127,7 +127,7 @@ struct<>
-- !query
ALTER TABLE test_change CHANGE c TYPE INT COMMENT ''
ALTER TABLE test_change CHANGE c COMMENT ''
-- !query schema
struct<>
-- !query output
@ -145,7 +145,15 @@ c int
-- !query
ALTER TABLE test_change CHANGE a TYPE INT COMMENT 'this is column a'
ALTER TABLE test_change CHANGE a TYPE INT
-- !query schema
struct<>
-- !query output
-- !query
ALTER TABLE test_change CHANGE a COMMENT 'this is column a'
-- !query schema
struct<>
-- !query output
@ -181,6 +189,24 @@ b string #*02?`
c int
-- !query
ALTER TABLE test_change CHANGE A COMMENT 'case insensitivity'
-- !query schema
struct<>
-- !query output
-- !query
DESC test_change
-- !query schema
struct<col_name:string,data_type:string,comment:string>
-- !query output
a int case insensitivity
b string #*02?`
c int
-- !query
CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one"
-- !query schema
@ -190,7 +216,7 @@ struct<>
-- !query
ALTER TABLE temp_view CHANGE a TYPE INT COMMENT 'this is column a'
ALTER TABLE temp_view CHANGE a TYPE INT
-- !query schema
struct<>
-- !query output
@ -207,7 +233,7 @@ struct<>
-- !query
ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT COMMENT 'this is column a'
ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT
-- !query schema
struct<>
-- !query output

View file

@ -651,19 +651,6 @@ trait AlterTableTests extends SharedSparkSession {
}
}
test("AlterTable: update column type and comment") {
val t = s"${catalogAndNamespace}table_name"
withTable(t) {
sql(s"CREATE TABLE $t (id int) USING $v2Format")
sql(s"ALTER TABLE $t ALTER COLUMN id TYPE bigint COMMENT 'doc'")
val table = getTableMetadata(t)
assert(table.name === fullTableName(t))
assert(table.schema === StructType(Seq(StructField("id", LongType).withComment("doc"))))
}
}
test("AlterTable: update nested column comment") {
val t = s"${catalogAndNamespace}table_name"
withTable(t) {

View file

@ -188,7 +188,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
withTable("t") {
sql("CREATE TABLE t(i INT) USING parquet")
val e = intercept[AnalysisException] {
sql("ALTER TABLE t ALTER COLUMN i TYPE INT FIRST")
sql("ALTER TABLE t ALTER COLUMN i FIRST")
}
assert(e.message.contains("ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables"))
}
@ -1786,7 +1786,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
column.map(_.metadata).getOrElse(Metadata.empty)
}
// Ensure that change column will preserve other metadata fields.
sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 TYPE INT COMMENT 'this is col1'")
sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 TYPE INT")
sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 COMMENT 'this is col1'")
assert(getMetadata("col1").getString("key") == "value")
assert(getMetadata("col1").getString("comment") == "this is col1")
}

View file

@ -1013,27 +1013,29 @@ class PlanResolutionSuite extends AnalysisTest {
Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
case (tblName, useV1Command) =>
val sql1 = s"ALTER TABLE $tblName ALTER COLUMN i TYPE bigint"
val sql2 = s"ALTER TABLE $tblName ALTER COLUMN i TYPE bigint COMMENT 'new comment'"
val sql3 = s"ALTER TABLE $tblName ALTER COLUMN i COMMENT 'new comment'"
val sql2 = s"ALTER TABLE $tblName ALTER COLUMN i COMMENT 'new comment'"
val parsed1 = parseAndResolve(sql1)
val parsed2 = parseAndResolve(sql2)
val tableIdent = TableIdentifier(tblName, None)
if (useV1Command) {
val oldColumn = StructField("i", IntegerType)
val newColumn = StructField("i", LongType)
val expected1 = AlterTableChangeColumnCommand(
tableIdent, "i", newColumn)
val expected2 = AlterTableChangeColumnCommand(
tableIdent, "i", newColumn.withComment("new comment"))
tableIdent, "i", oldColumn.withComment("new comment"))
comparePlans(parsed1, expected1)
comparePlans(parsed2, expected2)
val sql3 = s"ALTER TABLE $tblName ALTER COLUMN j COMMENT 'new comment'"
val e1 = intercept[AnalysisException] {
parseAndResolve(sql3)
}
assert(e1.getMessage.contains("ALTER COLUMN with v1 tables must specify new data type"))
assert(e1.getMessage.contains(
"ALTER COLUMN cannot find column j in v1 table. Available: i, s"))
val sql4 = s"ALTER TABLE $tblName ALTER COLUMN a.b.c TYPE bigint"
val e2 = intercept[AnalysisException] {
@ -1051,8 +1053,6 @@ class PlanResolutionSuite extends AnalysisTest {
val parsed5 = parseAndResolve(sql5)
comparePlans(parsed5, expected5)
} else {
val parsed3 = parseAndResolve(sql3)
parsed1 match {
case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
assert(changes == Seq(
@ -1061,14 +1061,6 @@ class PlanResolutionSuite extends AnalysisTest {
}
parsed2 match {
case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
assert(changes == Seq(
TableChange.updateColumnType(Array("i"), LongType),
TableChange.updateColumnComment(Array("i"), "new comment")))
case _ => fail("expect AlterTable")
}
parsed3 match {
case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
assert(changes == Seq(
TableChange.updateColumnComment(Array("i"), "new comment")))
@ -1078,6 +1070,37 @@ class PlanResolutionSuite extends AnalysisTest {
}
}
test("alter table: alter column action is not specified") {
val e = intercept[AnalysisException] {
parseAndResolve("ALTER TABLE v1Table ALTER COLUMN i")
}
assert(e.getMessage.contains(
"ALTER TABLE table ALTER COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER"))
}
test("alter table: alter column case sensitivity for v1 table") {
val tblName = "v1Table"
Seq(true, false).foreach { caseSensitive =>
withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
val sql = s"ALTER TABLE $tblName ALTER COLUMN I COMMENT 'new comment'"
if (caseSensitive) {
val e = intercept[AnalysisException] {
parseAndResolve(sql)
}
assert(e.getMessage.contains(
"ALTER COLUMN cannot find column I in v1 table. Available: i, s"))
} else {
val actual = parseAndResolve(sql)
val expected = AlterTableChangeColumnCommand(
TableIdentifier(tblName, None),
"I",
StructField("I", IntegerType).withComment("new comment"))
comparePlans(actual, expected)
}
}
}
}
test("alter table: hive style change column") {
Seq("v2Table", "testcat.tab").foreach { tblName =>
parseAndResolve(s"ALTER TABLE $tblName CHANGE COLUMN i i int COMMENT 'an index'") match {