[SPARK-7971] Add JavaDoc style deprecation for deprecated DataFrame methods

Scala deprecated annotation actually doesn't show up in JavaDoc. Author: Reynold Xin <rxin@databricks.com> Closes #6523 from rxin/df-deprecated-javadoc and squashes the following commits: 26da2b2 [Reynold Xin] [SPARK-7971] Add JavaDoc style deprecation for deprecated DataFrame methods.
2015-05-30 19:51:53 -07:00 · 2015-05-30 19:51:53 -07:00 · c63e1a742b
parent 14b314dc2c
commit c63e1a742b
3 changed files with 70 additions and 12 deletions
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@ -165,6 +165,9 @@ object DataType {

  def fromJson(json: String): DataType = parseDataType(parse(json))

+  /**
+   * @deprecated As of 1.2.0, replaced by `DataType.fromJson()`
+   */
  @deprecated("Use DataType.fromJson instead", "1.2.0")
  def fromCaseClassString(string: String): DataType = CaseClassStringParser(string)

--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@ -57,14 +57,11 @@ private[sql] object DataFrame {
 * :: Experimental ::
 * A distributed collection of data organized into named columns.
 *
- * A [[DataFrame]] is equivalent to a relational table in Spark SQL. There are multiple ways
- * to create a [[DataFrame]]:
+ * A [[DataFrame]] is equivalent to a relational table in Spark SQL. The following example creates
+ * a [[DataFrame]] by pointing Spark SQL to a Parquet data set.
 * {{{
- *   // Create a DataFrame from Parquet files
- *   val people = sqlContext.parquetFile("...")
- *
- *   // Create a DataFrame from data sources
- *   val df = sqlContext.load("...", "json")
+ *   val people = sqlContext.read.parquet("...")  // in Scala
+ *   DataFrame people = sqlContext.read().parquet("...")  // in Java
 * }}}
 *
 * Once created, it can be manipulated using the various domain-specific-language (DSL) functions
@ -86,8 +83,8 @@ private[sql] object DataFrame {
 * A more concrete example in Scala:
 * {{{
 *   // To create DataFrame using SQLContext
- *   val people = sqlContext.parquetFile("...")
- *   val department = sqlContext.parquetFile("...")
+ *   val people = sqlContext.read.parquet("...")
+ *   val department = sqlContext.read.parquet("...")
 *
 *   people.filter("age > 30")
 *     .join(department, people("deptId") === department("id"))
@ -98,8 +95,8 @@ private[sql] object DataFrame {
 * and in Java:
 * {{{
 *   // To create DataFrame using SQLContext
- *   DataFrame people = sqlContext.parquetFile("...");
- *   DataFrame department = sqlContext.parquetFile("...");
+ *   DataFrame people = sqlContext.read().parquet("...");
+ *   DataFrame department = sqlContext.read().parquet("...");
 *
 *   people.filter("age".gt(30))
 *     .join(department, people.col("deptId").equalTo(department("id")))
@ -1444,7 +1441,9 @@ class DataFrame private[sql](
  ////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////

-  /** Left here for backward compatibility. */
+  /**
+   * @deprecated As of 1.3.0, replaced by `toDF()`.
+   */
  @deprecated("use toDF", "1.3.0")
  def toSchemaRDD: DataFrame = this

@ -1455,6 +1454,7 @@ class DataFrame private[sql](
   * given name; if you pass `false`, it will throw if the table already
   * exists.
   * @group output
+   * @deprecated As of 1.340, replaced by `write().jdbc()`.
   */
  @deprecated("Use write.jdbc()", "1.4.0")
  def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
@ -1473,6 +1473,7 @@ class DataFrame private[sql](
   * the RDD in order via the simple statement
   * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().jdbc()`.
   */
  @deprecated("Use write.jdbc()", "1.4.0")
  def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
@ -1485,6 +1486,7 @@ class DataFrame private[sql](
   * Files that are written out using this method can be read back in as a [[DataFrame]]
   * using the `parquetFile` function in [[SQLContext]].
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().parquet()`.
   */
  @deprecated("Use write.parquet(path)", "1.4.0")
  def saveAsParquetFile(path: String): Unit = {
@ -1508,6 +1510,7 @@ class DataFrame private[sql](
   * Also note that while this function can persist the table metadata into Hive's metastore,
   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().saveAsTable(tableName)`.
   */
  @deprecated("Use write.saveAsTable(tableName)", "1.4.0")
  def saveAsTable(tableName: String): Unit = {
@ -1526,6 +1529,7 @@ class DataFrame private[sql](
   * Also note that while this function can persist the table metadata into Hive's metastore,
   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`.
   */
  @deprecated("Use write.mode(mode).saveAsTable(tableName)", "1.4.0")
  def saveAsTable(tableName: String, mode: SaveMode): Unit = {
@ -1545,6 +1549,7 @@ class DataFrame private[sql](
   * Also note that while this function can persist the table metadata into Hive's metastore,
   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().format(source).saveAsTable(tableName)`.
   */
  @deprecated("Use write.format(source).saveAsTable(tableName)", "1.4.0")
  def saveAsTable(tableName: String, source: String): Unit = {
@ -1564,6 +1569,7 @@ class DataFrame private[sql](
   * Also note that while this function can persist the table metadata into Hive's metastore,
   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`.
   */
  @deprecated("Use write.format(source).mode(mode).saveAsTable(tableName)", "1.4.0")
  def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
@ -1582,6 +1588,8 @@ class DataFrame private[sql](
   * Also note that while this function can persist the table metadata into Hive's metastore,
   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
   * @group output
+   * @deprecated As of 1.4.0, replaced by
+   *            `write().format(source).mode(mode).options(options).saveAsTable(tableName)`.
   */
  @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)",
    "1.4.0")
@ -1606,6 +1614,8 @@ class DataFrame private[sql](
   * Also note that while this function can persist the table metadata into Hive's metastore,
   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
   * @group output
+   * @deprecated As of 1.4.0, replaced by
+   *            `write().format(source).mode(mode).options(options).saveAsTable(tableName)`.
   */
  @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)",
    "1.4.0")
@ -1622,6 +1632,7 @@ class DataFrame private[sql](
   * using the default data source configured by spark.sql.sources.default and
   * [[SaveMode.ErrorIfExists]] as the save mode.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().save(path)`.
   */
  @deprecated("Use write.save(path)", "1.4.0")
  def save(path: String): Unit = {
@ -1632,6 +1643,7 @@ class DataFrame private[sql](
   * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
   * using the default data source configured by spark.sql.sources.default.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().mode(mode).save(path)`.
   */
  @deprecated("Use write.mode(mode).save(path)", "1.4.0")
  def save(path: String, mode: SaveMode): Unit = {
@ -1642,6 +1654,7 @@ class DataFrame private[sql](
   * Saves the contents of this DataFrame to the given path based on the given data source,
   * using [[SaveMode.ErrorIfExists]] as the save mode.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().format(source).save(path)`.
   */
  @deprecated("Use write.format(source).save(path)", "1.4.0")
  def save(path: String, source: String): Unit = {
@ -1652,6 +1665,7 @@ class DataFrame private[sql](
   * Saves the contents of this DataFrame to the given path based on the given data source and
   * [[SaveMode]] specified by mode.
   * @group output
+   * @deprecated As of 1.4.0, replaced by `write().format(source).mode(mode).save(path)`.
   */
  @deprecated("Use write.format(source).mode(mode).save(path)", "1.4.0")
  def save(path: String, source: String, mode: SaveMode): Unit = {
@ -1662,6 +1676,8 @@ class DataFrame private[sql](
   * Saves the contents of this DataFrame based on the given data source,
   * [[SaveMode]] specified by mode, and a set of options.
   * @group output
+   * @deprecated As of 1.4.0, replaced by
+   *            `write().format(source).mode(mode).options(options).save(path)`.
   */
  @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0")
  def save(
@ -1676,6 +1692,8 @@ class DataFrame private[sql](
   * Saves the contents of this DataFrame based on the given data source,
   * [[SaveMode]] specified by mode, and a set of options
   * @group output
+   * @deprecated As of 1.4.0, replaced by
+   *            `write().format(source).mode(mode).options(options).save(path)`.
   */
  @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0")
  def save(
@ -1689,6 +1707,8 @@ class DataFrame private[sql](
  /**
   * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
   * @group output
+   * @deprecated As of 1.4.0, replaced by
+   *            `write().mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName)`.
   */
  @deprecated("Use write.mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName)", "1.4.0")
  def insertInto(tableName: String, overwrite: Boolean): Unit = {
@ -1699,6 +1719,8 @@ class DataFrame private[sql](
   * Adds the rows from this RDD to the specified table.
   * Throws an exception if the table already exists.
   * @group output
+   * @deprecated As of 1.4.0, replaced by
+   *            `write().mode(SaveMode.Append).saveAsTable(tableName)`.
   */
  @deprecated("Use write.mode(SaveMode.Append).saveAsTable(tableName)", "1.4.0")
  def insertInto(tableName: String): Unit = {
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@ -1021,21 +1021,33 @@ class SQLContext(@transient val sparkContext: SparkContext)
  ////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////

+  /**
+   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+   */
  @deprecated("use createDataFrame", "1.3.0")
  def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
    createDataFrame(rowRDD, schema)
  }

+  /**
+   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+   */
  @deprecated("use createDataFrame", "1.3.0")
  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
    createDataFrame(rowRDD, schema)
  }

+  /**
+   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+   */
  @deprecated("use createDataFrame", "1.3.0")
  def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
    createDataFrame(rdd, beanClass)
  }

+  /**
+   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+   */
  @deprecated("use createDataFrame", "1.3.0")
  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
    createDataFrame(rdd, beanClass)
@ -1046,6 +1058,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * [[DataFrame]] if no paths are passed in.
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().parquet()`.
   */
  @deprecated("Use read.parquet()", "1.4.0")
  @scala.annotation.varargs
@ -1065,6 +1078,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * It goes through the entire dataset once to determine the schema.
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonFile(path: String): DataFrame = {
@ -1076,6 +1090,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * returning the result as a [[DataFrame]].
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonFile(path: String, schema: StructType): DataFrame = {
@ -1084,6 +1099,7 @@ class SQLContext(@transient val sparkContext: SparkContext)

  /**
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
@ -1096,6 +1112,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * It goes through the entire dataset once to determine the schema.
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
@ -1106,6 +1123,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * It goes through the entire dataset once to determine the schema.
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
@ -1115,6 +1133,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * returning the result as a [[DataFrame]].
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
@ -1126,6 +1145,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * schema, returning the result as a [[DataFrame]].
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
@ -1137,6 +1157,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * schema, returning the result as a [[DataFrame]].
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
@ -1148,6 +1169,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * schema, returning the result as a [[DataFrame]].
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().json()`.
   */
  @deprecated("Use read.json()", "1.4.0")
  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
@ -1159,6 +1181,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * using the default data source configured by spark.sql.sources.default.
   *
   * @group genericdata
+   * @deprecated As of 1.4.0, replaced by `read().load(path)`.
   */
  @deprecated("Use read.load(path)", "1.4.0")
  def load(path: String): DataFrame = {
@ -1169,6 +1192,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * Returns the dataset stored at path as a DataFrame, using the given data source.
   *
   * @group genericdata
+   * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
   */
  @deprecated("Use read.format(source).load(path)", "1.4.0")
  def load(path: String, source: String): DataFrame = {
@ -1180,6 +1204,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * a set of options as a DataFrame.
   *
   * @group genericdata
+   * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
   */
  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
  def load(source: String, options: java.util.Map[String, String]): DataFrame = {
@ -1191,6 +1216,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * a set of options as a DataFrame.
   *
   * @group genericdata
+   * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
   */
  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
  def load(source: String, options: Map[String, String]): DataFrame = {
@ -1202,6 +1228,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
   *
   * @group genericdata
+   * @deprecated As of 1.4.0, replaced by
+   *            `read().format(source).schema(schema).options(options).load()`.
   */
  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
  def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame =
@ -1214,6 +1242,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
   *
   * @group genericdata
+   * @deprecated As of 1.4.0, replaced by
+   *            `read().format(source).schema(schema).options(options).load()`.
   */
  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
  def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
@ -1225,6 +1255,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * url named table.
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
   */
  @deprecated("use read.jdbc()", "1.4.0")
  def jdbc(url: String, table: String): DataFrame = {
@ -1242,6 +1273,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
   *                      evenly into this many partitions
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
   */
  @deprecated("use read.jdbc()", "1.4.0")
  def jdbc(
@ -1261,6 +1293,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   * of the [[DataFrame]].
   *
   * @group specificdata
+   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
   */
  @deprecated("use read.jdbc()", "1.4.0")
  def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {