[SPARK-27964][SQL] Move v2 catalog update methods to CatalogV2Util
## What changes were proposed in this pull request? Move methods that implement v2 catalog operations to CatalogV2Util so they can be used in #24768. ## How was this patch tested? Behavior is validated by existing tests. Closes #24813 from rdblue/SPARK-27964-add-catalog-v2-util. Authored-by: Ryan Blue <blue@apache.org> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
20e8843350
commit
d1371a2dad
|
@ -823,6 +823,7 @@ object Unidoc {
|
||||||
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
|
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
|
||||||
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
|
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
|
||||||
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive/test")))
|
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive/test")))
|
||||||
|
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils")))
|
||||||
}
|
}
|
||||||
|
|
||||||
private def ignoreClasspaths(classpaths: Seq[Classpath]): Seq[Classpath] = {
|
private def ignoreClasspaths(classpaths: Seq[Classpath]): Seq[Classpath] = {
|
||||||
|
|
|
@ -0,0 +1,152 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.catalog.v2.utils
|
||||||
|
|
||||||
|
import java.util
|
||||||
|
import java.util.Collections
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
import org.apache.spark.sql.catalog.v2.TableChange
|
||||||
|
import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType}
|
||||||
|
import org.apache.spark.sql.types.{StructField, StructType}
|
||||||
|
|
||||||
|
object CatalogV2Util {
|
||||||
|
/**
|
||||||
|
* Apply properties changes to a map and return the result.
|
||||||
|
*/
|
||||||
|
def applyPropertiesChanges(
|
||||||
|
properties: Map[String, String],
|
||||||
|
changes: Seq[TableChange]): Map[String, String] = {
|
||||||
|
applyPropertiesChanges(properties.asJava, changes).asScala.toMap
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply properties changes to a Java map and return the result.
|
||||||
|
*/
|
||||||
|
def applyPropertiesChanges(
|
||||||
|
properties: util.Map[String, String],
|
||||||
|
changes: Seq[TableChange]): util.Map[String, String] = {
|
||||||
|
val newProperties = new util.HashMap[String, String](properties)
|
||||||
|
|
||||||
|
changes.foreach {
|
||||||
|
case set: SetProperty =>
|
||||||
|
newProperties.put(set.property, set.value)
|
||||||
|
|
||||||
|
case unset: RemoveProperty =>
|
||||||
|
newProperties.remove(unset.property)
|
||||||
|
|
||||||
|
case _ =>
|
||||||
|
// ignore non-property changes
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.unmodifiableMap(newProperties)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply schema changes to a schema and return the result.
|
||||||
|
*/
|
||||||
|
def applySchemaChanges(schema: StructType, changes: Seq[TableChange]): StructType = {
|
||||||
|
changes.foldLeft(schema) { (schema, change) =>
|
||||||
|
change match {
|
||||||
|
case add: AddColumn =>
|
||||||
|
add.fieldNames match {
|
||||||
|
case Array(name) =>
|
||||||
|
val newField = StructField(name, add.dataType, nullable = add.isNullable)
|
||||||
|
Option(add.comment) match {
|
||||||
|
case Some(comment) =>
|
||||||
|
schema.add(newField.withComment(comment))
|
||||||
|
case _ =>
|
||||||
|
schema.add(newField)
|
||||||
|
}
|
||||||
|
|
||||||
|
case names =>
|
||||||
|
replace(schema, names.init, parent => parent.dataType match {
|
||||||
|
case parentType: StructType =>
|
||||||
|
val field = StructField(names.last, add.dataType, nullable = add.isNullable)
|
||||||
|
val newParentType = Option(add.comment) match {
|
||||||
|
case Some(comment) =>
|
||||||
|
parentType.add(field.withComment(comment))
|
||||||
|
case None =>
|
||||||
|
parentType.add(field)
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(StructField(parent.name, newParentType, parent.nullable, parent.metadata))
|
||||||
|
|
||||||
|
case _ =>
|
||||||
|
throw new IllegalArgumentException(s"Not a struct: ${names.init.last}")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
case rename: RenameColumn =>
|
||||||
|
replace(schema, rename.fieldNames, field =>
|
||||||
|
Some(StructField(rename.newName, field.dataType, field.nullable, field.metadata)))
|
||||||
|
|
||||||
|
case update: UpdateColumnType =>
|
||||||
|
replace(schema, update.fieldNames, field => {
|
||||||
|
if (!update.isNullable && field.nullable) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
s"Cannot change optional column to required: $field.name")
|
||||||
|
}
|
||||||
|
Some(StructField(field.name, update.newDataType, update.isNullable, field.metadata))
|
||||||
|
})
|
||||||
|
|
||||||
|
case update: UpdateColumnComment =>
|
||||||
|
replace(schema, update.fieldNames, field =>
|
||||||
|
Some(field.withComment(update.newComment)))
|
||||||
|
|
||||||
|
case delete: DeleteColumn =>
|
||||||
|
replace(schema, delete.fieldNames, _ => None)
|
||||||
|
|
||||||
|
case _ =>
|
||||||
|
// ignore non-schema changes
|
||||||
|
schema
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private def replace(
|
||||||
|
struct: StructType,
|
||||||
|
fieldNames: Seq[String],
|
||||||
|
update: StructField => Option[StructField]): StructType = {
|
||||||
|
|
||||||
|
val pos = struct.getFieldIndex(fieldNames.head)
|
||||||
|
.getOrElse(throw new IllegalArgumentException(s"Cannot find field: ${fieldNames.head}"))
|
||||||
|
val field = struct.fields(pos)
|
||||||
|
val replacement: Option[StructField] = if (fieldNames.tail.isEmpty) {
|
||||||
|
update(field)
|
||||||
|
} else {
|
||||||
|
field.dataType match {
|
||||||
|
case nestedStruct: StructType =>
|
||||||
|
val updatedType: StructType = replace(nestedStruct, fieldNames.tail, update)
|
||||||
|
Some(StructField(field.name, updatedType, field.nullable, field.metadata))
|
||||||
|
case _ =>
|
||||||
|
throw new IllegalArgumentException(s"Not a struct: ${fieldNames.head}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val newFields = struct.fields.zipWithIndex.flatMap {
|
||||||
|
case (_, index) if pos == index =>
|
||||||
|
replacement
|
||||||
|
case (other, _) =>
|
||||||
|
Some(other)
|
||||||
|
}
|
||||||
|
|
||||||
|
new StructType(newFields)
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,16 +18,15 @@
|
||||||
package org.apache.spark.sql.catalog.v2
|
package org.apache.spark.sql.catalog.v2
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
import java.util.Collections
|
|
||||||
import java.util.concurrent.ConcurrentHashMap
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType}
|
|
||||||
import org.apache.spark.sql.catalog.v2.expressions.Transform
|
import org.apache.spark.sql.catalog.v2.expressions.Transform
|
||||||
|
import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
|
||||||
import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
|
import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
|
||||||
import org.apache.spark.sql.sources.v2.{Table, TableCapability}
|
import org.apache.spark.sql.sources.v2.{Table, TableCapability}
|
||||||
import org.apache.spark.sql.types.{StructField, StructType}
|
import org.apache.spark.sql.types.StructType
|
||||||
import org.apache.spark.sql.util.CaseInsensitiveStringMap
|
import org.apache.spark.sql.util.CaseInsensitiveStringMap
|
||||||
|
|
||||||
class TestTableCatalog extends TableCatalog {
|
class TestTableCatalog extends TableCatalog {
|
||||||
|
@ -79,8 +78,8 @@ class TestTableCatalog extends TableCatalog {
|
||||||
|
|
||||||
override def alterTable(ident: Identifier, changes: TableChange*): Table = {
|
override def alterTable(ident: Identifier, changes: TableChange*): Table = {
|
||||||
val table = loadTable(ident)
|
val table = loadTable(ident)
|
||||||
val properties = TestTableCatalog.applyPropertiesChanges(table.properties, changes)
|
val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
|
||||||
val schema = TestTableCatalog.applySchemaChanges(table.schema, changes)
|
val schema = CatalogV2Util.applySchemaChanges(table.schema, changes)
|
||||||
val newTable = InMemoryTable(table.name, schema, properties)
|
val newTable = InMemoryTable(table.name, schema, properties)
|
||||||
|
|
||||||
tables.put(ident, newTable)
|
tables.put(ident, newTable)
|
||||||
|
@ -91,122 +90,6 @@ class TestTableCatalog extends TableCatalog {
|
||||||
override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
|
override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
|
||||||
}
|
}
|
||||||
|
|
||||||
object TestTableCatalog {
|
|
||||||
/**
|
|
||||||
* Apply properties changes to a map and return the result.
|
|
||||||
*/
|
|
||||||
def applyPropertiesChanges(
|
|
||||||
properties: util.Map[String, String],
|
|
||||||
changes: Seq[TableChange]): util.Map[String, String] = {
|
|
||||||
val newProperties = new util.HashMap[String, String](properties)
|
|
||||||
|
|
||||||
changes.foreach {
|
|
||||||
case set: SetProperty =>
|
|
||||||
newProperties.put(set.property, set.value)
|
|
||||||
|
|
||||||
case unset: RemoveProperty =>
|
|
||||||
newProperties.remove(unset.property)
|
|
||||||
|
|
||||||
case _ =>
|
|
||||||
// ignore non-property changes
|
|
||||||
}
|
|
||||||
|
|
||||||
Collections.unmodifiableMap(newProperties)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply schema changes to a schema and return the result.
|
|
||||||
*/
|
|
||||||
def applySchemaChanges(schema: StructType, changes: Seq[TableChange]): StructType = {
|
|
||||||
changes.foldLeft(schema) { (schema, change) =>
|
|
||||||
change match {
|
|
||||||
case add: AddColumn =>
|
|
||||||
add.fieldNames match {
|
|
||||||
case Array(name) =>
|
|
||||||
val newField = StructField(name, add.dataType, nullable = add.isNullable)
|
|
||||||
Option(add.comment) match {
|
|
||||||
case Some(comment) =>
|
|
||||||
schema.add(newField.withComment(comment))
|
|
||||||
case _ =>
|
|
||||||
schema.add(newField)
|
|
||||||
}
|
|
||||||
|
|
||||||
case names =>
|
|
||||||
replace(schema, names.init, parent => parent.dataType match {
|
|
||||||
case parentType: StructType =>
|
|
||||||
val field = StructField(names.last, add.dataType, nullable = add.isNullable)
|
|
||||||
val newParentType = Option(add.comment) match {
|
|
||||||
case Some(comment) =>
|
|
||||||
parentType.add(field.withComment(comment))
|
|
||||||
case None =>
|
|
||||||
parentType.add(field)
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(StructField(parent.name, newParentType, parent.nullable, parent.metadata))
|
|
||||||
|
|
||||||
case _ =>
|
|
||||||
throw new IllegalArgumentException(s"Not a struct: ${names.init.last}")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
case rename: RenameColumn =>
|
|
||||||
replace(schema, rename.fieldNames, field =>
|
|
||||||
Some(StructField(rename.newName, field.dataType, field.nullable, field.metadata)))
|
|
||||||
|
|
||||||
case update: UpdateColumnType =>
|
|
||||||
replace(schema, update.fieldNames, field => {
|
|
||||||
if (!update.isNullable && field.nullable) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
s"Cannot change optional column to required: $field.name")
|
|
||||||
}
|
|
||||||
Some(StructField(field.name, update.newDataType, update.isNullable, field.metadata))
|
|
||||||
})
|
|
||||||
|
|
||||||
case update: UpdateColumnComment =>
|
|
||||||
replace(schema, update.fieldNames, field =>
|
|
||||||
Some(field.withComment(update.newComment)))
|
|
||||||
|
|
||||||
case delete: DeleteColumn =>
|
|
||||||
replace(schema, delete.fieldNames, _ => None)
|
|
||||||
|
|
||||||
case _ =>
|
|
||||||
// ignore non-schema changes
|
|
||||||
schema
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private def replace(
|
|
||||||
struct: StructType,
|
|
||||||
path: Seq[String],
|
|
||||||
update: StructField => Option[StructField]): StructType = {
|
|
||||||
|
|
||||||
val pos = struct.getFieldIndex(path.head)
|
|
||||||
.getOrElse(throw new IllegalArgumentException(s"Cannot find field: ${path.head}"))
|
|
||||||
val field = struct.fields(pos)
|
|
||||||
val replacement: Option[StructField] = if (path.tail.isEmpty) {
|
|
||||||
update(field)
|
|
||||||
} else {
|
|
||||||
field.dataType match {
|
|
||||||
case nestedStruct: StructType =>
|
|
||||||
val updatedType: StructType = replace(nestedStruct, path.tail, update)
|
|
||||||
Some(StructField(field.name, updatedType, field.nullable, field.metadata))
|
|
||||||
case _ =>
|
|
||||||
throw new IllegalArgumentException(s"Not a struct: ${path.head}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val newFields = struct.fields.zipWithIndex.flatMap {
|
|
||||||
case (_, index) if pos == index =>
|
|
||||||
replacement
|
|
||||||
case (other, _) =>
|
|
||||||
Some(other)
|
|
||||||
}
|
|
||||||
|
|
||||||
new StructType(newFields)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case class InMemoryTable(
|
case class InMemoryTable(
|
||||||
name: String,
|
name: String,
|
||||||
schema: StructType,
|
schema: StructType,
|
||||||
|
|
|
@ -23,8 +23,9 @@ import java.util.concurrent.ConcurrentHashMap
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
|
|
||||||
import org.apache.spark.sql.catalog.v2.{CatalogV2Implicits, Identifier, TableCatalog, TableChange, TestTableCatalog}
|
import org.apache.spark.sql.catalog.v2.{CatalogV2Implicits, Identifier, TableCatalog, TableChange}
|
||||||
import org.apache.spark.sql.catalog.v2.expressions.Transform
|
import org.apache.spark.sql.catalog.v2.expressions.Transform
|
||||||
|
import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
|
||||||
import org.apache.spark.sql.catalyst.InternalRow
|
import org.apache.spark.sql.catalyst.InternalRow
|
||||||
import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
|
import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
|
||||||
import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
|
import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
|
||||||
|
@ -85,8 +86,8 @@ class TestInMemoryTableCatalog extends TableCatalog {
|
||||||
override def alterTable(ident: Identifier, changes: TableChange*): Table = {
|
override def alterTable(ident: Identifier, changes: TableChange*): Table = {
|
||||||
Option(tables.get(ident)) match {
|
Option(tables.get(ident)) match {
|
||||||
case Some(table) =>
|
case Some(table) =>
|
||||||
val properties = TestTableCatalog.applyPropertiesChanges(table.properties, changes)
|
val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
|
||||||
val schema = TestTableCatalog.applySchemaChanges(table.schema, changes)
|
val schema = CatalogV2Util.applySchemaChanges(table.schema, changes)
|
||||||
val newTable = new InMemoryTable(table.name, schema, properties, table.data)
|
val newTable = new InMemoryTable(table.name, schema, properties, table.data)
|
||||||
|
|
||||||
tables.put(ident, newTable)
|
tables.put(ident, newTable)
|
||||||
|
|
Loading…
Reference in a new issue