[MINOR] Fix Typos 'an -> a'

## What changes were proposed in this pull request? `an -> a` Use cmds like `find . -name '*.R' | xargs -i sh -c "grep -in ' an [^aeiou]' {} && echo {}"` to generate candidates, and review them one by one. ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #13515 from zhengruifeng/an_a.
2016-06-06 09:35:47 +01:00 · 2016-06-06 09:35:47 +01:00 · fd8af39713
parent 32f2f95dbd
commit fd8af39713
70 changed files with 102 additions and 102 deletions
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@ -489,7 +489,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
 #   checkedFunc An environment of function objects examined during cleanClosure. It can be
 #               considered as a "name"-to-"list of functions" mapping.
 # return value
-#   a new version of func that has an correct environment (closure).
+#   a new version of func that has a correct environment (closure).
 cleanClosure <- function(func, checkedFuncs = new.env()) {
  if (is.function(func)) {
    newEnv <- new.env(parent = .GlobalEnv)
--- a/core/src/main/scala/org/apache/spark/Accumulable.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulable.scala
@ -28,7 +28,7 @@ import org.apache.spark.util.{AccumulatorContext, AccumulatorMetadata, LegacyAcc


 /**
- * A data type that can be accumulated, i.e. has an commutative and associative "add" operation,
+ * A data type that can be accumulated, i.e. has a commutative and associative "add" operation,
 * but where the result type, `R`, may be different from the element type being added, `T`.
 *
 * You must define how to add data, and how to merge two of these together.  For some data types,
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@ -789,7 +789,7 @@ class JavaSparkContext(val sc: SparkContext)
  def cancelAllJobs(): Unit = sc.cancelAllJobs()

  /**
-   * Returns an Java map of JavaRDDs that have marked themselves as persistent via cache() call.
+   * Returns a Java map of JavaRDDs that have marked themselves as persistent via cache() call.
   * Note that this does not necessarily mean the caching or computation was successful.
   */
  def getPersistentRDDs: JMap[java.lang.Integer, JavaRDD[_]] = {
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@ -919,7 +919,7 @@ private class PythonAccumulatorParam(@transient private val serverHost: String,
 }

 /**
- * An Wrapper for Python Broadcast, which is written into disk by Python. It also will
+ * A Wrapper for Python Broadcast, which is written into disk by Python. It also will
 * write the data into disk after deserialization, then Python can read it from disks.
 */
 // scalastyle:off no.finalize
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@ -408,12 +408,12 @@ object SparkSubmit {
      printErrorAndExit("SparkR is not supported for Mesos cluster.")
    }

-    // If we're running a R app, set the main class to our specific R runner
+    // If we're running an R app, set the main class to our specific R runner
    if (args.isR && deployMode == CLIENT) {
      if (args.primaryResource == SPARKR_SHELL) {
        args.mainClass = "org.apache.spark.api.r.RBackend"
      } else {
-        // If a R file is provided, add it to the child arguments and list of files to deploy.
+        // If an R file is provided, add it to the child arguments and list of files to deploy.
        // Usage: RRunner <main R file> [app arguments]
        args.mainClass = "org.apache.spark.deploy.RRunner"
        args.childArgs = ArrayBuffer(args.primaryResource) ++ args.childArgs
@ -422,7 +422,7 @@ object SparkSubmit {
    }

    if (isYarnCluster && args.isR) {
-      // In yarn-cluster mode for a R app, add primary resource to files
+      // In yarn-cluster mode for an R app, add primary resource to files
      // that can be distributed with the job
      args.files = mergeFileLists(args.files, args.primaryResource)
    }
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@ -34,7 +34,7 @@ private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) e

 // TODO: Expose a jdbcRDD function in SparkContext and mark this as semi-private
 /**
- * An RDD that executes an SQL query on a JDBC connection and reads results.
+ * An RDD that executes a SQL query on a JDBC connection and reads results.
 * For usage example, see test case JdbcRDDSuite.
 *
 * @param getConnection a function that returns an open Connection.
@ -138,7 +138,7 @@ object JdbcRDD {
  }

  /**
-   * Create an RDD that executes an SQL query on a JDBC connection and reads results.
+   * Create an RDD that executes a SQL query on a JDBC connection and reads results.
   * For usage example, see test case JavaAPISuite.testJavaJdbcRDD.
   *
   * @param connectionFactory a factory that returns an open Connection.
@ -178,7 +178,7 @@ object JdbcRDD {
  }

  /**
-   * Create an RDD that executes an SQL query on a JDBC connection and reads results. Each row is
+   * Create an RDD that executes a SQL query on a JDBC connection and reads results. Each row is
   * converted into a `Object` array. For usage example, see test case JavaAPISuite.testJavaJdbcRDD.
   *
   * @param connectionFactory a factory that returns an open Connection.
--- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
@ -26,7 +26,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode

 /**
- * An Schedulable entity that represents collection of Pools or TaskSetManagers
+ * A Schedulable entity that represents collection of Pools or TaskSetManagers
 */
 private[spark] class Pool(
    val poolName: String,
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@ -138,7 +138,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext {
  }

  /**
-   * Verify the persistence of state associated with an TorrentBroadcast in a local-cluster.
+   * Verify the persistence of state associated with a TorrentBroadcast in a local-cluster.
   *
   * This test creates a broadcast variable, uses it on all executors, and then unpersists it.
   * In between each step, this test verifies that the broadcast blocks are present only on the
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@ -408,7 +408,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {

  /**
   * Start a [[StandaloneRestServer]] that communicates with the given endpoint.
-   * If `faulty` is true, start an [[FaultyStandaloneRestServer]] instead.
+   * If `faulty` is true, start a [[FaultyStandaloneRestServer]] instead.
   * Return the master URL that corresponds to the address of this server.
   */
  private def startServer(
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@ -489,7 +489,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
  /**
   * Setup an [[RpcEndpoint]] to collect all network events.
   *
-   * @return the [[RpcEndpointRef]] and an `ConcurrentLinkedQueue` that contains network events.
+   * @return the [[RpcEndpointRef]] and a `ConcurrentLinkedQueue` that contains network events.
   */
  private def setupNetworkEndpoint(
      _env: RpcEnv,
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@ -1712,7 +1712,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
  }

  test("reduce tasks should be placed locally with map output") {
-    // Create an shuffleMapRdd with 1 partition
+    // Create a shuffleMapRdd with 1 partition
    val shuffleMapRdd = new MyRDD(sc, 1, Nil)
    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
    val shuffleId = shuffleDep.shuffleId
@ -1733,7 +1733,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou

  test("reduce task locality preferences should only include machines with largest map outputs") {
    val numMapTasks = 4
-    // Create an shuffleMapRdd with more partitions
+    // Create a shuffleMapRdd with more partitions
    val shuffleMapRdd = new MyRDD(sc, numMapTasks, Nil)
    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1))
    val shuffleId = shuffleDep.shuffleId
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@ -258,7 +258,7 @@ class JsonProtocolSuite extends SparkFunSuite {
  }

  test("FetchFailed backwards compatibility") {
-    // FetchFailed in Spark 1.1.0 does not have an "Message" property.
+    // FetchFailed in Spark 1.1.0 does not have a "Message" property.
    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
      "ignored")
    val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
@ -26,7 +26,7 @@ import org.apache.spark.streaming.flume.sink._
 /**
 * This class implements the core functionality of [[FlumePollingReceiver]]. When started it
 * pulls data from Flume, stores it to Spark and then sends an Ack or Nack. This class should be
- * run via an [[java.util.concurrent.Executor]] as this implements [[Runnable]]
+ * run via a [[java.util.concurrent.Executor]] as this implements [[Runnable]]
 *
 * @param receiver The receiver that owns this instance.
 */
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
@ -27,7 +27,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.collection.BitSet

 /**
- * An class containing additional operations for subclasses of VertexPartitionBase that provide
+ * A class containing additional operations for subclasses of VertexPartitionBase that provide
 * implicit evidence of membership in the `VertexPartitionBaseOpsConstructor` typeclass (for
 * example, [[VertexPartition.VertexPartitionOpsConstructor]]).
 */
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@ -548,7 +548,7 @@ object DenseVector {
 }

 /**
- * A sparse vector represented by an index array and an value array.
+ * A sparse vector represented by an index array and a value array.
 *
 * @param size size of the vector.
 * @param indices index array, assume to be strictly increasing.
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@ -82,7 +82,7 @@ abstract class PipelineStage extends Params with Logging {
 * be called on the input dataset to fit a model. Then the model, which is a transformer, will be
 * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]],
 * its [[Transformer#transform]] method will be called to produce the dataset for the next stage.
- * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and
+ * The fitted model from a [[Pipeline]] is a [[PipelineModel]], which consists of fitted models and
 * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as
 * an identity transformer.
 */
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@ -854,7 +854,7 @@ class BinaryLogisticRegressionSummary private[classification] (

  /**
   * Returns the receiver operating characteristic (ROC) curve,
-   * which is an Dataframe having two fields (FPR, TPR)
+   * which is a Dataframe having two fields (FPR, TPR)
   * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
   *
   * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
@ -874,7 +874,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC()

  /**
-   * Returns the precision-recall curve, which is an Dataframe containing
+   * Returns the precision-recall curve, which is a Dataframe containing
   * two fields recall, precision with (0.0, 1.0) prepended to it.
   *
   * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@ -491,7 +491,7 @@ private[spark] object RandomForest extends Logging {
    timer.start("chooseSplits")

    // In each partition, iterate all instances and compute aggregate stats for each node,
-    // yield an (nodeIndex, nodeAggregateStats) pair for each node.
+    // yield a (nodeIndex, nodeAggregateStats) pair for each node.
    // After a `reduceByKey` operation,
    // stats of a node will be shuffled to a particular partition and be combined together,
    // then best splits for nodes are found there.
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@ -86,7 +86,7 @@ class LogisticRegressionModel @Since("1.3.0") (
  /**
   * Sets the threshold that separates positive predictions from negative predictions
   * in Binary Logistic Regression. An example with prediction score greater than or equal to
-   * this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
+   * this threshold is identified as a positive, and negative otherwise. The default value is 0.5.
   * It is only used for binary classification.
   */
  @Since("1.0.0")
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@ -44,7 +44,7 @@ class SVMModel @Since("1.1.0") (

  /**
   * Sets the threshold that separates positive predictions from negative predictions. An example
-   * with prediction score greater than or equal to this threshold is identified as an positive,
+   * with prediction score greater than or equal to this threshold is identified as a positive,
   * and negative otherwise. The default value is 0.0.
   */
  @Since("1.0.0")
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
@ -53,7 +53,7 @@ trait VectorTransformer extends Serializable {
  }

  /**
-   * Applies transformation on an JavaRDD[Vector].
+   * Applies transformation on a JavaRDD[Vector].
   *
   * @param data JavaRDD[Vector] to be transformed.
   * @return transformed JavaRDD[Vector].
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@ -731,7 +731,7 @@ object DenseVector {
 }

 /**
- * A sparse vector represented by an index array and an value array.
+ * A sparse vector represented by an index array and a value array.
 *
 * @param size size of the vector.
 * @param indices index array, assume to be strictly increasing.
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
 import org.apache.spark.rdd.RDD

 /**
- * Represents an entry in an distributed matrix.
+ * Represents an entry in a distributed matrix.
 * @param i row index
 * @param j column index
 * @param value value of the entry
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@ -47,7 +47,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
      combOp = (queue1, queue2) => {
        queue1 ++= queue2
      }
-    ).mapValues(_.toArray.sorted(ord.reverse))  // This is an min-heap, so we reverse the order.
+    ).mapValues(_.toArray.sorted(ord.reverse))  // This is a min-heap, so we reverse the order.
  }
 }

--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@ -351,7 +351,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
    def roc(self):
        """
        Returns the receiver operating characteristic (ROC) curve,
-        which is an Dataframe having two fields (FPR, TPR) with
+        which is a Dataframe having two fields (FPR, TPR) with
        (0.0, 0.0) prepended and (1.0, 1.0) appended to it.

        .. seealso:: `Wikipedia reference \
@ -380,7 +380,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
    @since("2.0.0")
    def pr(self):
        """
-        Returns the precision-recall curve, which is an Dataframe
+        Returns the precision-recall curve, which is a Dataframe
        containing two fields recall, precision with (0.0, 1.0) prepended
        to it.

--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@ -42,7 +42,7 @@ class Pipeline(Estimator, MLReadable, MLWritable):
    stage. If a stage is a :py:class:`Transformer`, its
    :py:meth:`Transformer.transform` method will be called to produce
    the dataset for the next stage. The fitted model from a
-    :py:class:`Pipeline` is an :py:class:`PipelineModel`, which
+    :py:class:`Pipeline` is a :py:class:`PipelineModel`, which
    consists of fitted models and transformers, corresponding to the
    pipeline stages. If there are no stages, the pipeline acts as an
    identity transformer.
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@ -52,7 +52,7 @@ class LinearClassificationModel(LinearModel):

        Sets the threshold that separates positive predictions from
        negative predictions. An example with prediction score greater
-        than or equal to this threshold is identified as an positive,
+        than or equal to this threshold is identified as a positive,
        and negative otherwise. It is used for binary classification
        only.
        """
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@ -60,7 +60,7 @@ _picklable_classes = [

 # this will call the MLlib version of pythonToJava()
 def _to_java_object_rdd(rdd):
-    """ Return an JavaRDD of Object by unpickling
+    """ Return a JavaRDD of Object by unpickling

    It will convert each Python object into Java object by Pyrolite, whenever the
    RDD is serialized in batch or not.
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@ -1040,7 +1040,7 @@ class RDD(object):
        If the elements in RDD do not vary (max == min) always returns
        a single bucket.

-        It will return an tuple of buckets and histogram.
+        It will return a tuple of buckets and histogram.

        >>> rdd = sc.parallelize(range(51))
        >>> rdd.histogram(2)
@ -2211,7 +2211,7 @@ class RDD(object):
        return values.collect()

    def _to_java_object_rdd(self):
-        """ Return an JavaRDD of Object by unpickling
+        """ Return a JavaRDD of Object by unpickling

        It will convert each Python object into Java object by Pyrolite, whenever the
        RDD is serialized in batch or not.
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@ -360,7 +360,7 @@ class SparkSession(object):

    def _createFromLocal(self, data, schema):
        """
-        Create an RDD for DataFrame from an list or pandas.DataFrame, returns
+        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
        the RDD and schema.
        """
        # make sure data could consumed multiple times
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@ -72,7 +72,7 @@ class ContinuousQuery(object):

    @since(2.0)
    def processAllAvailable(self):
-        """Blocks until all available data in the source has been processed an committed to the
+        """Blocks until all available data in the source has been processed and committed to the
        sink. This method is intended for testing. Note that in the case of continually arriving
        data, this method may block forever. Additionally, this method is only guaranteed to block
        until data that has been synchronously appended data to a stream source prior to invocation.
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@ -1046,7 +1046,7 @@ def _need_converter(dataType):


 def _create_converter(dataType):
-    """Create an converter to drop the names of fields in obj """
+    """Create a converter to drop the names of fields in obj """
    if not _need_converter(dataType):
        return lambda x: x

--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@ -608,8 +608,8 @@ class DStream(object):

 class TransformedDStream(DStream):
    """
-    TransformedDStream is an DStream generated by an Python function
-    transforming each RDD of an DStream to another RDDs.
+    TransformedDStream is a DStream generated by an Python function
+    transforming each RDD of a DStream to another RDDs.

    Multiple continuous transformations of DStream can be combined into
    one transformation.
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@ -300,7 +300,7 @@ trait Row extends Serializable {
    getMap[K, V](i).asJava

  /**
-   * Returns the value at position i of struct type as an [[Row]] object.
+   * Returns the value at position i of struct type as a [[Row]] object.
   *
   * @throws ClassCastException when data type does not match.
   */
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.types._

 /**
- * A trivial [[Analyzer]] with an dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
+ * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
 * Used for testing when all relations are already filled in and the analyzer needs only
 * to resolve attribute references.
 */
@ -1496,7 +1496,7 @@ class Analyzer(
   * This rule handles three cases:
   *  - A [[Project]] having [[WindowExpression]]s in its projectList;
   *  - An [[Aggregate]] having [[WindowExpression]]s in its aggregateExpressions.
-   *  - An [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING
+   *  - A [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING
   *    clause and the [[Aggregate]] has [[WindowExpression]]s in its aggregateExpressions.
   * Note: If there is a GROUP BY clause in the query, aggregations and corresponding
   * filters (expressions in the HAVING clause) should be evaluated before any
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@ -410,7 +410,7 @@ object FunctionRegistry {
          case Failure(e) => throw new AnalysisException(e.getMessage)
        }
      } else {
-        // Otherwise, find an ctor method that matches the number of arguments, and use that.
+        // Otherwise, find a constructor method that matches the number of arguments, and use that.
        val params = Seq.fill(expressions.size)(classOf[Expression])
        val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match {
          case Success(e) =>
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

 /**
- * A trait that should be mixed into query operators where an single instance might appear multiple
+ * A trait that should be mixed into query operators where a single instance might appear multiple
 * times in a logical query plan.  It is invalid to have multiple copies of the same attribute
 * produced by distinct operators in a query tree as this breaks the guarantee that expression
 * ids, which are used to differentiate attributes, are unique.
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@ -241,7 +241,7 @@ class SessionCatalog(
  /**
   * Retrieve the metadata of an existing metastore table.
   * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown.
+   * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown.
   */
  def getTableMetadata(name: TableIdentifier): CatalogTable = {
    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
@ -266,7 +266,7 @@ class SessionCatalog(
  /**
   * Load files stored in given path into an existing metastore table.
   * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown.
+   * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown.
   */
  def loadTable(
      name: TableIdentifier,
@ -283,7 +283,7 @@ class SessionCatalog(
  /**
   * Load files stored in given path into the partition of an existing metastore table.
   * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown.
+   * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown.
   */
  def loadPartition(
      name: TableIdentifier,
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.catalog

 import org.apache.spark.sql.AnalysisException

-/** An trait that represents the type of a resourced needed by a function. */
+/** A trait that represents the type of a resourced needed by a function. */
 abstract class FunctionResourceType(val resourceType: String)

 object JarResource extends FunctionResourceType("jar")
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.types.AbstractDataType

 /**
- * An trait that gets mixin to define the expected input types of an expression.
+ * A trait that gets mixin to define the expected input types of an expression.
 *
 * This trait is typically used by operator expressions (e.g. [[Add]], [[Subtract]]) to define
 * expected input types without any implicit casting.
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@ -158,7 +158,7 @@ object UnsafeProjection {
 object FromUnsafeProjection {

  /**
-   * Returns an Projection for given StructType.
+   * Returns a Projection for given StructType.
   */
  def apply(schema: StructType): Projection = {
    apply(schema.fields.map(_.dataType))
@ -172,7 +172,7 @@ object FromUnsafeProjection {
  }

  /**
-   * Returns an Projection for given sequence of Expressions (bounded).
+   * Returns a Projection for given sequence of Expressions (bounded).
   */
  private def create(exprs: Seq[Expression]): Projection = {
    GenerateSafeProjection.generate(exprs)
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@ -91,7 +91,7 @@ case class CreateMap(children: Seq[Expression]) extends Expression {

  override def checkInputDataTypes(): TypeCheckResult = {
    if (children.size % 2 != 0) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName expects an positive even number of arguments.")
+      TypeCheckResult.TypeCheckFailure(s"$prettyName expects a positive even number of arguments.")
    } else if (keys.map(_.dataType).distinct.length > 1) {
      TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " +
        "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]"))
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
@ -148,7 +148,7 @@ abstract class NumericType extends AtomicType {
  // implicitly[Numeric[JvmType]] to be valid, we have to change JvmType from a type variable to a
  // type parameter and add a numeric annotation (i.e., [JvmType : Numeric]). This gets
  // desugared by the compiler into an argument to the objects constructor. This means there is no
-  // longer an no argument constructor and thus the JVM cannot serialize the object anymore.
+  // longer a no argument constructor and thus the JVM cannot serialize the object anymore.
  private[sql] val numeric: Numeric[InternalType]
 }

--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@ -314,7 +314,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
  def json(paths: String*): DataFrame = format("json").load(paths : _*)

  /**
-   * Loads an `JavaRDD[String]` storing JSON objects (one object per record) and
+   * Loads a `JavaRDD[String]` storing JSON objects (one object per record) and
   * returns the result as a [[DataFrame]].
   *
   * Unless the schema is specified using [[schema]] function, this function goes through the
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@ -370,7 +370,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)

  /**
   * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
   * the provided schema. Otherwise, there will be runtime exception.
   *
@ -384,7 +384,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)

  /**
   * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
   * It is important to make sure that the structure of every [[Row]] of the provided List matches
   * the provided schema. Otherwise, there will be runtime exception.
   *
@ -421,7 +421,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
  }

  /**
-   * Applies a schema to an List of Java Beans.
+   * Applies a schema to a List of Java Beans.
   *
   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
   *          SELECT * queries will return the columns in an undefined order.
@ -552,7 +552,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from 0 to `end` (exclusive) with step value 1.
+   * in a range from 0 to `end` (exclusive) with step value 1.
   *
   * @since 2.0.0
   * @group dataset
@ -563,7 +563,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with step value 1.
+   * in a range from `start` to `end` (exclusive) with step value 1.
   *
   * @since 2.0.0
   * @group dataset
@ -574,7 +574,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value.
+   * in a range from `start` to `end` (exclusive) with a step value.
   *
   * @since 2.0.0
   * @group dataset
@ -587,7 +587,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value, with partition number
+   * in a range from `start` to `end` (exclusive) with a step value, with partition number
   * specified.
   *
   * @since 2.0.0
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@ -33,7 +33,7 @@ abstract class SQLImplicits {
  protected def _sqlContext: SQLContext

  /**
-   * Converts $"col name" into an [[Column]].
+   * Converts $"col name" into a [[Column]].
   *
   * @since 2.0.0
   */
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@ -305,7 +305,7 @@ class SparkSession private(

  /**
   * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
   * the provided schema. Otherwise, there will be runtime exception.
   *
@ -319,7 +319,7 @@ class SparkSession private(

  /**
   * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
   * It is important to make sure that the structure of every [[Row]] of the provided List matches
   * the provided schema. Otherwise, there will be runtime exception.
   *
@ -365,7 +365,7 @@ class SparkSession private(
  }

  /**
-   * Applies a schema to an List of Java Beans.
+   * Applies a schema to a List of Java Beans.
   *
   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
   *          SELECT * queries will return the columns in an undefined order.
@ -475,7 +475,7 @@ class SparkSession private(
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from 0 to `end` (exclusive) with step value 1.
+   * in a range from 0 to `end` (exclusive) with step value 1.
   *
   * @since 2.0.0
   * @group dataset
@ -486,7 +486,7 @@ class SparkSession private(
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with step value 1.
+   * in a range from `start` to `end` (exclusive) with step value 1.
   *
   * @since 2.0.0
   * @group dataset
@ -499,7 +499,7 @@ class SparkSession private(
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value.
+   * in a range from `start` to `end` (exclusive) with a step value.
   *
   * @since 2.0.0
   * @group dataset
@ -512,7 +512,7 @@ class SparkSession private(
  /**
   * :: Experimental ::
   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value, with partition number
+   * in a range from `start` to `end` (exclusive) with a step value, with partition number
   * specified.
   *
   * @since 2.0.0
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
@ -274,7 +274,7 @@ class SQLBuilder(logicalPlan: LogicalPlan) extends Logging {
    //   5. the table alias for output columns of generator.
    //   6. the AS keyword
    //   7. the column alias, can be more than one, e.g. AS key, value
-    // An concrete example: "tbl LATERAL VIEW EXPLODE(map_col) sub_q AS key, value", and the builder
+    // A concrete example: "tbl LATERAL VIEW EXPLODE(map_col) sub_q AS key, value", and the builder
    // will put it in FROM clause later.
    build(
      childSQL,
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
@ -86,7 +86,7 @@ class SortBasedAggregationIterator(
  // The aggregation buffer used by the sort-based aggregation.
  private[this] val sortBasedAggregationBuffer: MutableRow = newBuffer

-  // An SafeProjection to turn UnsafeRow into GenericInternalRow, because UnsafeRow can't be
+  // A SafeProjection to turn UnsafeRow into GenericInternalRow, because UnsafeRow can't be
  // compared to MutableRow (aggregation buffer) directly.
  private[this] val safeProj: Projection = FromUnsafeProjection(valueAttributes.map(_.dataType))

--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@ -202,7 +202,7 @@ sealed trait BufferSetterGetterUtils {
 }

 /**
- * A Mutable [[Row]] representing an mutable aggregation buffer.
+ * A Mutable [[Row]] representing a mutable aggregation buffer.
 */
 private[sql] class MutableAggregationBufferImpl (
    schema: StructType,
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@ -58,7 +58,7 @@ class MutableUnsafeRow(val writer: UnsafeRowWriter) extends GenericMutableRow(nu
 }

 /**
- * Generates bytecode for an [[ColumnarIterator]] for columnar cache.
+ * Generates bytecode for a [[ColumnarIterator]] for columnar cache.
 */
 object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarIterator] with Logging {

--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.SparkPlan
 *    is only done on top level columns, but formats should support pruning of nested columns as
 *    well.
 *  - Construct a reader function by passing filters and the schema into the FileFormat.
- *  - Using an partition pruning predicates, enumerate the list of files that should be read.
+ *  - Using a partition pruning predicates, enumerate the list of files that should be read.
 *  - Split the files into tasks and construct a FileScanRDD.
 *  - Add any projection or filters that must be evaluated after the scan.
 *
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@ -50,7 +50,7 @@ object JacksonParser extends Logging {

  /**
   * Parse the current token (and related children) according to a desired schema
-   * This is an wrapper for the method `convertField()` to handle a row wrapped
+   * This is a wrapper for the method `convertField()` to handle a row wrapped
   * with an array.
   */
  def convertRootField(
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala
@ -68,7 +68,7 @@ private[parquet] trait HasParentContainerUpdater {
 }

 /**
- * A convenient converter class for Parquet group types with an [[HasParentContainerUpdater]].
+ * A convenient converter class for Parquet group types with a [[HasParentContainerUpdater]].
 */
 private[parquet] abstract class CatalystGroupConverter(val updater: ParentContainerUpdater)
  extends GroupConverter with HasParentContainerUpdater
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
@ -47,10 +47,10 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan}
 *    partitions.
 *
 * The workflow of this coordinator is described as follows:
- *  - Before the execution of a [[SparkPlan]], for an [[ShuffleExchange]] operator,
+ *  - Before the execution of a [[SparkPlan]], for a [[ShuffleExchange]] operator,
 *    if an [[ExchangeCoordinator]] is assigned to it, it registers itself to this coordinator.
 *    This happens in the `doPrepare` method.
- *  - Once we start to execute a physical plan, an [[ShuffleExchange]] registered to this
+ *  - Once we start to execute a physical plan, a [[ShuffleExchange]] registered to this
 *    coordinator will call `postShuffleRDD` to get its corresponding post-shuffle
 *    [[ShuffledRowRDD]].
 *    If this coordinator has made the decision on how to shuffle data, this [[ShuffleExchange]]
@ -61,7 +61,7 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan}
 *    post-shuffle partitions and pack multiple pre-shuffle partitions with continuous indices
 *    to a single post-shuffle partition whenever necessary.
 *  - Finally, this coordinator will create post-shuffle [[ShuffledRowRDD]]s for all registered
- *    [[ShuffleExchange]]s. So, when an [[ShuffleExchange]] calls `postShuffleRDD`, this coordinator
+ *    [[ShuffleExchange]]s. So, when a [[ShuffleExchange]] calls `postShuffleRDD`, this coordinator
 *    can lookup the corresponding [[RDD]].
 *
 * The strategy used to determine the number of post-shuffle partitions is described as follows.
@ -98,8 +98,8 @@ private[sql] class ExchangeCoordinator(
  @volatile private[this] var estimated: Boolean = false

  /**
-   * Registers an [[ShuffleExchange]] operator to this coordinator. This method is only allowed to
-   * be called in the `doPrepare` method of an [[ShuffleExchange]] operator.
+   * Registers a [[ShuffleExchange]] operator to this coordinator. This method is only allowed to
+   * be called in the `doPrepare` method of a [[ShuffleExchange]] operator.
   */
  @GuardedBy("this")
  def registerExchange(exchange: ShuffleExchange): Unit = synchronized {
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.util.collection.BitSet

 /**
- * Performs an sort merge join of two child relations.
+ * Performs a sort merge join of two child relations.
 */
 case class SortMergeJoinExec(
    leftKeys: Seq[Expression],
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
@ -40,7 +40,7 @@ private[sql] case class MapPartitionsRWrapper(

    val (newIter, deserializer, colNames) =
      if (!isSerializedRData) {
-        // Serialize each row into an byte array that can be deserialized in the R worker
+        // Serialize each row into a byte array that can be deserialized in the R worker
        (iter.asInstanceOf[Iterator[Row]].map {row => rowToRBytes(row)},
         SerializationFormats.ROW, inputSchema.fieldNames)
      } else {
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@ -133,7 +133,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {

 /**
 * :: Experimental ::
- * A [[Row]] representing an mutable aggregation buffer.
+ * A [[Row]] representing a mutable aggregation buffer.
 *
 * This is not meant to be extended outside of Spark.
 */
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@ -52,7 +52,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) {
    org.apache.spark.util.Utils.getContextOrSparkClassLoader)

  /**
-   * Create a SQLListener then add it into SparkContext, and create an SQLTab if there is SparkUI.
+   * Create a SQLListener then add it into SparkContext, and create a SQLTab if there is SparkUI.
   */
  private def createListenerAndUI(sc: SparkContext): SQLListener = {
    if (SparkSession.sqlListener.get() == null) {
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala
@ -93,7 +93,7 @@ trait ContinuousQuery {
  def awaitTermination(timeoutMs: Long): Boolean

  /**
-   * Blocks until all available data in the source has been processed an committed to the sink.
+   * Blocks until all available data in the source has been processed and committed to the sink.
   * This method is intended for testing. Note that in the case of continually arriving data, this
   * method may block forever. Additionally, this method is only guaranteed to block until data that
   * has been synchronously appended data to a [[org.apache.spark.sql.execution.streaming.Source]]
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@ -218,7 +218,7 @@ private[hive] trait HiveClient {
  /** Create a function in an existing database. */
  def createFunction(db: String, func: CatalogFunction): Unit

-  /** Drop an existing function an the database. */
+  /** Drop an existing function in the database. */
  def dropFunction(db: String, name: String): Unit

  /** Rename an existing function in the database. */
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@ -30,7 +30,7 @@ import org.apache.spark.sql.types.StructType
 private[orc] object OrcFileOperator extends Logging {
  /**
   * Retrieves an ORC file reader from a given path.  The path can point to either a directory or a
-   * single ORC file.  If it points to an directory, it picks any non-empty ORC file within that
+   * single ORC file.  If it points to a directory, it picks any non-empty ORC file within that
   * directory.
   *
   * The reader returned by this method is mainly used for two purposes:
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@ -37,7 +37,7 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution}
 * Allows the creations of tests that execute the same query against both hive
 * and catalyst, comparing the results.
 *
- * The "golden" results from Hive are cached in an retrieved both from the classpath and
+ * The "golden" results from Hive are cached in and retrieved both from the classpath and
 * [[answerCache]] to speed up testing.
 *
 * See the documentation of public vals in this class for information on how test execution can be
--- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
@ -120,7 +120,7 @@ sealed abstract class State[S] {
  def isTimingOut(): Boolean

  /**
-   * Get the state as an [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`.
+   * Get the state as a [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`.
   */
  @inline final def getOption(): Option[S] = if (exists) Some(get()) else None

--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@ -349,7 +349,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
  }

  /**
-   * Create an input stream from an queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
   * it will process either one or all of the RDDs returned by the queue.
   *
   * NOTE:
@ -369,7 +369,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
  }

  /**
-   * Create an input stream from an queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
   * it will process either one or all of the RDDs returned by the queue.
   *
   * NOTE:
@ -393,7 +393,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
  }

  /**
-   * Create an input stream from an queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
   * it will process either one or all of the RDDs returned by the queue.
   *
   * NOTE:
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@ -157,7 +157,7 @@ abstract class DStream[T: ClassTag] (
  def persist(level: StorageLevel): DStream[T] = {
    if (this.isInitialized) {
      throw new UnsupportedOperationException(
-        "Cannot change storage level of an DStream after streaming context has started")
+        "Cannot change storage level of a DStream after streaming context has started")
    }
    this.storageLevel = level
    this
@ -176,7 +176,7 @@ abstract class DStream[T: ClassTag] (
  def checkpoint(interval: Duration): DStream[T] = {
    if (isInitialized) {
      throw new UnsupportedOperationException(
-        "Cannot change checkpoint interval of an DStream after streaming context has started")
+        "Cannot change checkpoint interval of a DStream after streaming context has started")
    }
    persist()
    checkpointDuration = interval
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
@ -31,5 +31,5 @@ private[streaming] case class ArrayBufferBlock(arrayBuffer: ArrayBuffer[_]) exte
 /** class representing a block received as an Iterator */
 private[streaming] case class IteratorBlock(iterator: Iterator[_]) extends ReceivedBlock

-/** class representing a block received as an ByteBuffer */
+/** class representing a block received as a ByteBuffer */
 private[streaming] case class ByteBufferBlock(byteBuffer: ByteBuffer) extends ReceivedBlock
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
@ -31,7 +31,7 @@ import org.apache.spark.streaming.receiver.Receiver
 *    all receivers at the same time. ReceiverTracker will call `scheduleReceivers` at this phase.
 *    It will try to schedule receivers such that they are evenly distributed. ReceiverTracker
 *    should update its `receiverTrackingInfoMap` according to the results of `scheduleReceivers`.
- *    `ReceiverTrackingInfo.scheduledLocations` for each receiver should be set to an location list
+ *    `ReceiverTrackingInfo.scheduledLocations` for each receiver should be set to a location list
 *    that contains the scheduled locations. Then when a receiver is starting, it will send a
 *    register request and `ReceiverTracker.registerReceiver` will be called. In
 *    `ReceiverTracker.registerReceiver`, if a receiver's scheduled locations is set, it should
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@ -71,7 +71,7 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
  /**
   * Tests a streaming operation under checkpointing, by restarting the operation
   * from checkpoint file and verifying whether the final output is correct.
-   * The output is assumed to have come from a reliable queue which an replay
+   * The output is assumed to have come from a reliable queue which a replay
   * data as required.
   *
   * NOTE: This takes into consideration that the last batch processed before
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala
@ -119,7 +119,7 @@ class PIDRateEstimatorSuite extends SparkFunSuite with Matchers {

  test("with no accumulated but some positive error, |I| > 0, follow the processing speed") {
    val p = new PIDRateEstimator(20, 1D, 1D, 0D, 10)
-    // prepare a series of batch updates, one every 20ms with an decreasing number of processed
+    // prepare a series of batch updates, one every 20ms with a decreasing number of processed
    // elements in each batch, but constant processing time, and no accumulated error. Even though
    // the integral part is non-zero, the estimated rate should follow only the proportional term,
    // asking for less and less elements