[MINOR][SQL] Improve readability for window execution

### What changes were proposed in this pull request? I read the comments of `WindowExec` and found some comment will cause confusion and another need to improve. ### Why are the changes needed? This PR will enhance the readability and let developer works more easy ### Does this PR introduce any user-facing change? No ### How was this patch tested? No need Closes #27431 from beliefer/improve-window-readability. Authored-by: beliefer <beliefer@163.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
2020-02-19 14:26:27 +08:00 · 2020-02-19 14:26:27 +08:00 · 0894dbab2c
parent e065e22e5e
commit 0894dbab2c
3 changed files with 19 additions and 9 deletions
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@ -38,15 +38,25 @@ import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType,
 * - Entire partition: The frame is the entire partition, i.e.
 *   UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. For this case, window function will take all
 *   rows as inputs and be evaluated once.
- * - Growing frame: We only add new rows into the frame, i.e. UNBOUNDED PRECEDING AND ....
+ * - Growing frame: We only add new rows into the frame, Examples are:
+ *     1. UNBOUNDED PRECEDING AND 1 PRECEDING
+ *     2. UNBOUNDED PRECEDING AND CURRENT ROW
+ *     3. UNBOUNDED PRECEDING AND 1 FOLLOWING
 *   Every time we move to a new row to process, we add some rows to the frame. We do not remove
 *   rows from this frame.
- * - Shrinking frame: We only remove rows from the frame, i.e. ... AND UNBOUNDED FOLLOWING.
+ * - Shrinking frame: We only remove rows from the frame, Examples are:
+ *     1. 1 PRECEDING AND UNBOUNDED FOLLOWING
+ *     2. CURRENT ROW AND UNBOUNDED FOLLOWING
+ *     3. 1 FOLLOWING AND UNBOUNDED FOLLOWING
 *   Every time we move to a new row to process, we remove some rows from the frame. We do not add
 *   rows to this frame.
 * - Moving frame: Every time we move to a new row to process, we remove some rows from the frame
 *   and we add some rows to the frame. Examples are:
- *     1 PRECEDING AND CURRENT ROW and 1 FOLLOWING AND 2 FOLLOWING.
+ *     1. 2 PRECEDING AND 1 PRECEDING
+ *     2. 1 PRECEDING AND CURRENT ROW
+ *     3. CURRENT ROW AND 1 FOLLOWING
+ *     4. 1 PRECEDING AND 1 FOLLOWING
+ *     5. 1 FOLLOWING AND 2 FOLLOWING
 * - Offset frame: The frame consist of one row, which is an offset number of rows away from the
 *   current row. Only [[OffsetWindowFunction]]s can be processed in an offset frame.
 *
@ -105,7 +115,7 @@ case class WindowExec(
  override def outputPartitioning: Partitioning = child.outputPartitioning

  protected override def doExecute(): RDD[InternalRow] = {
-    // Unwrap the expressions and factories from the map.
+    // Unwrap the window expressions and window frame factories from the map.
    val expressions = windowFrameExpressionFactoryPairs.flatMap(_._1)
    val factories = windowFrameExpressionFactoryPairs.map(_._2).toArray
    val inMemoryThreshold = sqlContext.conf.windowExecBufferInMemoryThreshold
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@ -114,7 +114,7 @@ abstract class WindowExecBase(

  /**
   * Collection containing an entry for each window frame to process. Each entry contains a frame's
-   * [[WindowExpression]]s and factory function for the WindowFrameFunction.
+   * [[WindowExpression]]s and factory function for the [[WindowFrameFunction]].
   */
  protected lazy val windowFrameExpressionFactoryPairs = {
    type FrameKey = (String, FrameType, Expression, Expression)
@ -170,7 +170,7 @@ abstract class WindowExecBase(
              MutableProjection.create(expressions, schema))
        }

-        // Create the factory
+        // Create the factory to produce WindowFunctionFrame.
        val factory = key match {
          // Offset Frame
          case ("OFFSET", _, IntegerLiteral(offset), _) =>
@ -223,7 +223,7 @@ abstract class WindowExecBase(
        // Keep track of the number of expressions. This is a side-effect in a map...
        numExpressions += expressions.size

-        // Create the Frame Expression - Factory pair.
+        // Create the Window Expression - Frame Factory pair.
        (expressions, factory)
    }
  }
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@ -47,14 +47,14 @@ abstract class WindowFunctionFrame {
  /**
   * The current lower window bound in the row array (inclusive).
   *
-   * This should be called after the current row is updated via [[write]]
+   * This should be called after the current row is updated via `write`.
   */
  def currentLowerBound(): Int

  /**
   * The current row index of the upper window bound in the row array (exclusive)
   *
-   * This should be called after the current row is updated via [[write]]
+   * This should be called after the current row is updated via `write`.
   */
  def currentUpperBound(): Int
 }