From eca81cc0aedba8b7f94d687ddb06604339b74c5b Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 27 Aug 2021 10:51:27 +0800 Subject: [PATCH] [SPARK-36457][DOCS][3.2] Review and fix issues in Scala/Java API docs ### What changes were proposed in this pull request? Compare the 3.2.0 API doc with the latest release version 3.1.2. Fix the following issues: - Add missing `Since` annotation for new APIs - Remove the leaking class/object in API doc ### Why are the changes needed? Improve API docs ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #33845 from gengliangwang/SPARK-36457-3.2. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- core/src/main/scala/org/apache/spark/SparkException.scala | 2 +- .../org/apache/spark/io/MutableCheckedOutputStream.scala | 2 +- .../apache/spark/scheduler/MiscellaneousProcessDetails.scala | 3 ++- .../scala/org/apache/spark/scheduler/SparkListener.scala | 1 + .../main/scala/org/apache/spark/util/DependencyUtils.scala | 2 +- project/SparkBuild.scala | 1 + .../apache/spark/sql/connector/catalog/FunctionCatalog.java | 4 ++++ .../apache/spark/sql/connector/catalog/TruncatableTable.java | 2 ++ .../sql/connector/catalog/functions/AggregateFunction.java | 4 ++++ .../spark/sql/connector/catalog/functions/BoundFunction.java | 4 ++++ .../spark/sql/connector/catalog/functions/Function.java | 5 +++++ .../sql/connector/catalog/functions/ScalarFunction.java | 4 ++++ .../sql/connector/catalog/functions/UnboundFunction.java | 4 ++++ .../sql/connector/read/streaming/ReportsSourceMetrics.java | 2 ++ .../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../scala/org/apache/spark/sql/types/UDTRegistration.scala | 3 ++- .../scala/org/apache/spark/sql/types/UserDefinedType.scala | 3 ++- .../java/org/apache/spark/sql/connector/write/V1Write.java | 2 ++ 18 files changed, 43 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index 4137003e8f..6ba425fe90 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -72,7 +72,7 @@ private[spark] class SparkUpgradeException(version: String, message: String, cau /** * Arithmetic exception thrown from Spark with an error class. */ -class SparkArithmeticException(errorClass: String, messageParameters: Array[String]) +private[spark] class SparkArithmeticException(errorClass: String, messageParameters: Array[String]) extends ArithmeticException(SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { diff --git a/core/src/main/scala/org/apache/spark/io/MutableCheckedOutputStream.scala b/core/src/main/scala/org/apache/spark/io/MutableCheckedOutputStream.scala index 754b4a8772..351493b564 100644 --- a/core/src/main/scala/org/apache/spark/io/MutableCheckedOutputStream.scala +++ b/core/src/main/scala/org/apache/spark/io/MutableCheckedOutputStream.scala @@ -24,7 +24,7 @@ import java.util.zip.Checksum * A variant of [[java.util.zip.CheckedOutputStream]] which can * change the checksum calculator at runtime. */ -class MutableCheckedOutputStream(out: OutputStream) extends OutputStream { +private[spark] class MutableCheckedOutputStream(out: OutputStream) extends OutputStream { private var checksum: Checksum = _ def setChecksum(c: Checksum): Unit = { diff --git a/core/src/main/scala/org/apache/spark/scheduler/MiscellaneousProcessDetails.scala b/core/src/main/scala/org/apache/spark/scheduler/MiscellaneousProcessDetails.scala index 0e5fe0ad45..00f6344f06 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/MiscellaneousProcessDetails.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/MiscellaneousProcessDetails.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.scheduler -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} /** * :: DeveloperApi :: @@ -24,6 +24,7 @@ import org.apache.spark.annotation.DeveloperApi */ @DeveloperApi +@Since("3.2.0") class MiscellaneousProcessDetails( val hostPort: String, val cores: Int, diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala index d00866edf7..a9d8634794 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala @@ -228,6 +228,7 @@ case class SparkListenerUnschedulableTaskSetRemoved( case class SparkListenerBlockUpdated(blockUpdatedInfo: BlockUpdatedInfo) extends SparkListenerEvent @DeveloperApi +@Since("3.2.0") case class SparkListenerMiscellaneousProcessAdded(time: Long, processId: String, info: MiscellaneousProcessDetails) extends SparkListenerEvent diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala index da8ea4f3f0..e0c2337571 100644 --- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala @@ -29,7 +29,7 @@ import org.apache.spark.deploy.SparkSubmitUtils import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ -case class IvyProperties( +private[spark] case class IvyProperties( packagesExclusions: String, packages: String, repositories: String, diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f64ac27553..8edc1710e8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -967,6 +967,7 @@ object Unidoc { .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils"))) + .map(_.filterNot(_.getCanonicalPath.contains("org.apache.spark.sql.errors"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hive"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/v2/avro"))) .map(_.filterNot(_.getCanonicalPath.contains("SSLOptions"))) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionCatalog.java index 651c9148c4..ce725d1424 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionCatalog.java @@ -17,13 +17,17 @@ package org.apache.spark.sql.connector.catalog; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.connector.catalog.functions.UnboundFunction; /** * Catalog methods for working with Functions. + * + * @since 3.2.0 */ +@Evolving public interface FunctionCatalog extends CatalogPlugin { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java index a69f384913..4bc2aa6e18 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java @@ -21,6 +21,8 @@ import org.apache.spark.annotation.Evolving; /** * Represents a table which can be atomically truncated. + * + * @since 3.2.0 */ @Evolving public interface TruncatableTable extends Table { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/AggregateFunction.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/AggregateFunction.java index 4181feafed..6e8ae972c8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/AggregateFunction.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/AggregateFunction.java @@ -17,6 +17,7 @@ package org.apache.spark.sql.connector.catalog.functions; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.DataType; @@ -42,7 +43,10 @@ import java.io.Serializable; * * @param the JVM type for the aggregation's intermediate state; must be {@link Serializable} * @param the JVM type of result values + * + * @since 3.2.0 */ +@Evolving public interface AggregateFunction extends BoundFunction { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/BoundFunction.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/BoundFunction.java index c53f94a168..53a1beb9c4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/BoundFunction.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/BoundFunction.java @@ -17,6 +17,7 @@ package org.apache.spark.sql.connector.catalog.functions; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.IntegerType; import org.apache.spark.sql.types.StructType; @@ -25,7 +26,10 @@ import java.util.UUID; /** * Represents a function that is bound to an input type. + * + * @since 3.2.0 */ +@Evolving public interface BoundFunction extends Function { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/Function.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/Function.java index b7f14eb271..270e80ac94 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/Function.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/Function.java @@ -17,11 +17,16 @@ package org.apache.spark.sql.connector.catalog.functions; +import org.apache.spark.annotation.Evolving; + import java.io.Serializable; /** * Base class for user-defined functions. + * + * @since 3.2.0 */ +@Evolving public interface Function extends Serializable { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java index eca9e1f7b9..8f360344d1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java @@ -17,6 +17,7 @@ package org.apache.spark.sql.connector.catalog.functions; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.DataType; @@ -133,7 +134,10 @@ import org.apache.spark.sql.types.DataType; * * @param the JVM type of result values, MUST be consistent with the {@link DataType} * returned via {@link #resultType()}, according to the mapping above. + * + * @since 3.2.0 */ +@Evolving public interface ScalarFunction extends BoundFunction { String MAGIC_METHOD_NAME = "invoke"; diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/UnboundFunction.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/UnboundFunction.java index c7dd4c2b88..55417f94a3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/UnboundFunction.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/UnboundFunction.java @@ -17,11 +17,15 @@ package org.apache.spark.sql.connector.catalog.functions; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.types.StructType; /** * Represents a user-defined function that is not bound to input types. + * + * @since 3.2.0 */ +@Evolving public interface UnboundFunction extends Function { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSourceMetrics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSourceMetrics.java index 705cd1ad04..3bb15e5b46 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSourceMetrics.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSourceMetrics.java @@ -25,6 +25,8 @@ import org.apache.spark.annotation.Evolving; /** * A mix-in interface for {@link SparkDataStream} streaming sources to signal that they can report * metrics. + * + * @since 3.2.0 */ @Evolving public interface ReportsSourceMetrics extends SparkDataStream { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index eedf03876a..e7af006ad7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -46,7 +46,7 @@ import org.apache.spark.sql.types._ * As commands are executed eagerly, this also includes errors thrown during the execution of * commands, which users can see immediately. */ -private[spark] object QueryCompilationErrors { +object QueryCompilationErrors { def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = { new AnalysisException( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala index eb28b06459..a6cd77b99c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.types import scala.collection.mutable -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.util.Utils @@ -31,6 +31,7 @@ import org.apache.spark.util.Utils * alternative approach to register UDTs for user classes. */ @DeveloperApi +@Since("3.2.0") object UDTRegistration extends Serializable with Logging { /** The mapping between the Class between UserDefinedType and user classes. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala index 7a26809415..5cbd876b31 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala @@ -22,7 +22,7 @@ import java.util.Objects import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} /** * The data type for User Defined Types (UDTs). @@ -38,6 +38,7 @@ import org.apache.spark.annotation.DeveloperApi * The conversion via `deserialize` occurs when reading from a `DataFrame`. */ @DeveloperApi +@Since("3.2.0") abstract class UserDefinedType[UserType >: Null] extends DataType with Serializable { /** Underlying storage type for this UDT */ diff --git a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java index a299967ee8..4a27c3e02a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java +++ b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java @@ -26,6 +26,8 @@ import org.apache.spark.sql.sources.InsertableRelation; *

* Tables that have {@link TableCapability#V1_BATCH_WRITE} in the list of their capabilities * must build {@link V1Write}. + * + * @since 3.2.0 */ @Unstable public interface V1Write extends Write {