[SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of quoteIfNeeded

### What changes were proposed in this pull request?
There are two implementation of quoteIfNeeded.  One is in `org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quote` and the other is in `OrcFiltersBase.quoteAttributeNameIfNeeded`. This PR will consolidate them into one.

### Why are the changes needed?
Simplify the codebase.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
Existing UTs.

Closes #27814 from dbtsai/SPARK-31058.

Authored-by: DB Tsai <d_tsai@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
This commit is contained in:
DB Tsai 2020-03-06 00:13:57 +00:00
parent 8d5ef2f766
commit fe126a6a05
7 changed files with 34 additions and 45 deletions

View file

@ -55,7 +55,7 @@ class IdentifierImpl implements Identifier {
@Override
public String toString() {
return Stream.concat(Stream.of(namespace), Stream.of(name))
.map(CatalogV2Implicits::quote)
.map(CatalogV2Implicits::quoteIfNeeded)
.collect(Collectors.joining("."));
}

View file

@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits {
}
implicit class NamespaceHelper(namespace: Array[String]) {
def quoted: String = namespace.map(quote).mkString(".")
def quoted: String = namespace.map(quoteIfNeeded).mkString(".")
}
implicit class IdentifierHelper(ident: Identifier) {
def quoted: String = {
if (ident.namespace.nonEmpty) {
ident.namespace.map(quote).mkString(".") + "." + quote(ident.name)
ident.namespace.map(quoteIfNeeded).mkString(".") + "." + quoteIfNeeded(ident.name)
} else {
quote(ident.name)
quoteIfNeeded(ident.name)
}
}
@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits {
s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
}
def quoted: String = parts.map(quote).mkString(".")
def quoted: String = parts.map(quoteIfNeeded).mkString(".")
}
def quote(part: String): String = {
def quoteIfNeeded(part: String): String = {
if (part.contains(".") || part.contains("`")) {
s"`${part.replace("`", "``")}`"
} else {

View file

@ -24,6 +24,7 @@ import scala.collection.mutable
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
import org.apache.spark.sql.types.StructType
@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
def quoted: String = {
identifier.database match {
case Some(db) =>
Seq(db, identifier.table).map(quote).mkString(".")
Seq(db, identifier.table).map(quoteIfNeeded).mkString(".")
case _ =>
quote(identifier.table)
quoteIfNeeded(identifier.table)
}
}
private def quote(part: String): String = {
if (part.contains(".") || part.contains("`")) {
s"`${part.replace("`", "``")}`"
} else {
part
}
}
}
def catalogTable: CatalogTable = v1Table

View file

@ -118,7 +118,7 @@ class ResolveSessionCatalog(
.map(_._2.dataType)
.getOrElse {
throw new AnalysisException(
s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " +
s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " +
s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
}
}

View file

@ -36,16 +36,6 @@ trait OrcFiltersBase {
}
}
// Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
// in order to distinguish predicate pushdown for nested columns.
protected[sql] def quoteAttributeNameIfNeeded(name: String) : String = {
if (!name.contains("`") && name.contains(".")) {
s"`$name`"
} else {
name
}
}
/**
* Return true if this is a searchable type in ORC.
* Both CharType and VarcharType are cleaned at AstBuilder.

View file

@ -24,6 +24,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
import org.apache.orc.storage.serde2.io.HiveDecimalWritable
import org.apache.spark.SparkException
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._
@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
// NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
// call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
// wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
// Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
// in order to distinguish predicate pushdown for nested columns.
expression match {
case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
Some(builder.startAnd().in(quotedName, getType(attribute),
castedValues.map(_.asInstanceOf[AnyRef]): _*).end())

View file

@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable
import org.apache.spark.SparkException
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._
@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
// NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
// call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
// wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
// Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
// in order to distinguish predicate pushdown for nested columns.
expression match {
case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
Some(builder.startAnd().in(quotedName, getType(attribute),
castedValues.map(_.asInstanceOf[AnyRef]): _*).end())