[SPARK-23062][SQL] Improve EXCEPT documentation

## What changes were proposed in this pull request? Make the default behavior of EXCEPT (i.e. EXCEPT DISTINCT) more explicit in the documentation, and call out the change in behavior from 1.x. Author: Henry Robinson <henry@cloudera.com> Closes #20254 from henryr/spark-23062.
2018-01-17 16:01:41 +08:00 · 2018-01-17 16:01:41 +08:00 · 1f3d933e0b
parent a0aedb0ded
commit 1f3d933e0b
3 changed files with 4 additions and 3 deletions
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@ -2853,7 +2853,7 @@ setMethod("intersect",
 #' except
 #'
 #' Return a new SparkDataFrame containing rows in this SparkDataFrame
-#' but not in another SparkDataFrame. This is equivalent to \code{EXCEPT} in SQL.
+#' but not in another SparkDataFrame. This is equivalent to \code{EXCEPT DISTINCT} in SQL.
 #'
 #' @param x a SparkDataFrame.
 #' @param y a SparkDataFrame.
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@ -1364,7 +1364,8 @@ class DataFrame(object):
        """ Return a new :class:`DataFrame` containing rows in this frame
        but not in another frame.

-        This is equivalent to `EXCEPT` in SQL.
+        This is equivalent to `EXCEPT DISTINCT` in SQL.
+
        """
        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)

--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@ -1903,7 +1903,7 @@ class Dataset[T] private[sql](

  /**
   * Returns a new Dataset containing rows in this Dataset but not in another Dataset.
-   * This is equivalent to `EXCEPT` in SQL.
+   * This is equivalent to `EXCEPT DISTINCT` in SQL.
   *
   * @note Equality checking is performed directly on the encoded representation of the data
   * and thus is not affected by a custom `equals` function defined on `T`.