diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 12b3ecfcca..98cfe2a33f 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -23,6 +23,8 @@ license: | {:toc} ## Upgrading from Spark SQL 2.4 to 3.0 + - Since Spark 3.0, the permanent function created using resource throws `AnalysisException` if the resource does not exists. + - Since Spark 3.0, when inserting a value into a table column with a different data type, the type coercion is performed as per ANSI SQL standard. Certain unreasonable type conversions such as converting `string` to `int` and `double` to `boolean` are disallowed. A runtime exception will be thrown if the value is out-of-range for the data type of the column. In Spark version 2.4 and earlier, type conversions during table insertion are allowed as long as they are valid `Cast`. When inserting an out-of-range value to a integral field, the low-order bits of the value is inserted(the same as Java/Scala numeric type casting). For example, if 257 is inserted to a field of byte type, the result is 1. The behavior is controlled by the option `spark.sql.storeAssignmentPolicy`, with a default value as "ANSI". Setting the option as "Legacy" restores the previous behavior. - In Spark 3.0, the deprecated methods `SQLContext.createExternalTable` and `SparkSession.createExternalTable` have been removed in favor of its replacement, `createTable`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index 6fdc7f4a58..13c9a06f78 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -19,11 +19,13 @@ package org.apache.spark.sql.execution.command import java.util.Locale +import org.apache.hadoop.fs.Path + import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException} import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource} -import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionInfo} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.types.{StringType, StructField, StructType} @@ -74,6 +76,15 @@ case class CreateFunctionCommand( } override def run(sparkSession: SparkSession): Seq[Row] = { + // Checks if the given resources exist + val hadoopConf = sparkSession.sparkContext.hadoopConfiguration + val nonExistentResources = resources.filter { r => + val path = new Path(r.uri) + !path.getFileSystem(hadoopConf).exists(path) + } + if (nonExistentResources.nonEmpty) { + throw new AnalysisException(s"Resources not found: ${nonExistentResources.mkString(",")}") + } val catalog = sparkSession.sessionState.catalog val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), className, resources) if (isTemp) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index cbe99bfde9..cf24372e0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3370,6 +3370,20 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession { checkAnswer(df5, Array.empty[Row]) } } + + test("SPARK-28670: create function should throw AnalysisException if UDF class not found") { + Seq(true, false).foreach { isTemporary => + val exp = intercept[AnalysisException] { + sql( + s""" + |CREATE ${if (isTemporary) "TEMPORARY" else ""} FUNCTION udtf_test + |AS 'org.apache.spark.sql.hive.execution.UDFTest' + |USING JAR '/var/invalid/invalid.jar' + """.stripMargin) + } + assert(exp.getMessage.contains("Resources not found")) + } + } } case class Foo(bar: Option[String])