diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index 9a767dd739..9b6cbab38c 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -662,7 +662,7 @@ public final class BytesToBytesMap extends MemoryConsumer { * It is only valid to call this method immediately after calling `lookup()` using the same key. *

*

- * The key and value must be word-aligned (that is, their sizes must multiples of 8). + * The key and value must be word-aligned (that is, their sizes must be a multiple of 8). *

*

* After calling this method, calls to `get[Key|Value]Address()` and `get[Key|Value]Length` diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java index ff0dcc259a..ab800288dc 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java @@ -51,7 +51,7 @@ final class UnsafeSorterSpillMerger { if (spillReader.hasNext()) { // We only add the spillReader to the priorityQueue if it is not empty. We do this to // make sure the hasNext method of UnsafeSorterIterator returned by getSortedIterator - // does not return wrong result because hasNext will returns true + // does not return wrong result because hasNext will return true // at least priorityQueue.size() times. If we allow n spillReaders in the // priorityQueue, we will have n extra empty records in the result of UnsafeSorterIterator. spillReader.loadNext(); diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 70a8c659bb..4cc0063d01 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -107,7 +107,7 @@ class SparkHadoopUtil extends Logging { } /** - * Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop + * Return an appropriate (subclass) of Configuration. Creating config can initialize some Hadoop * subsystems. */ def newConfiguration(conf: SparkConf): Configuration = { diff --git a/core/src/test/java/test/org/apache/spark/JavaSparkContextSuite.java b/core/src/test/java/test/org/apache/spark/JavaSparkContextSuite.java index 7e9cc70d86..0f489fb219 100644 --- a/core/src/test/java/test/org/apache/spark/JavaSparkContextSuite.java +++ b/core/src/test/java/test/org/apache/spark/JavaSparkContextSuite.java @@ -30,7 +30,7 @@ import org.apache.spark.api.java.*; import org.apache.spark.*; /** - * Java apps can uses both Java-friendly JavaSparkContext and Scala SparkContext. + * Java apps can use both Java-friendly JavaSparkContext and Scala SparkContext. */ public class JavaSparkContextSuite implements Serializable { diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala index 941f0ab177..65046c175a 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala @@ -155,7 +155,7 @@ private[kafka010] case class InternalKafkaConsumer( var toFetchOffset = offset var consumerRecord: ConsumerRecord[Array[Byte], Array[Byte]] = null // We want to break out of the while loop on a successful fetch to avoid using "return" - // which may causes a NonLocalReturnControl exception when this method is used as a function. + // which may cause a NonLocalReturnControl exception when this method is used as a function. var isFetchComplete = false while (toFetchOffset != UNKNOWN_OFFSET && !isFetchComplete) { diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index af651b056f..408d92ef18 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -1484,7 +1484,7 @@ sealed trait LogisticRegressionSummary extends Serializable { /** * Convenient method for casting to binary logistic regression summary. - * This method will throws an Exception if the summary is not a binary summary. + * This method will throw an Exception if the summary is not a binary summary. */ @Since("2.3.0") def asBinary: BinaryLogisticRegressionSummary = this match { diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 3cd7a2ef11..214d8fe6bb 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -206,7 +206,7 @@ class DecimalType(FractionalType): and scale (the number of digits on the right of dot). For example, (5, 2) can support the value from [-999.99 to 999.99]. - The precision can be up to 38, the scale must less or equal to precision. + The precision can be up to 38, the scale must be less or equal to precision. When create a DecimalType, the default precision and scale is (10, 0). When infer schema from decimal.Decimal objects, it will be DecimalType(38, 18). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index 23d146e71e..e511f8064e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -286,7 +286,7 @@ object DecimalPrecision extends TypeCoercionRule { // Consider the following example: multiplying a column which is DECIMAL(38, 18) by 2. // If we use the default precision and scale for the integer type, 2 is considered a // DECIMAL(10, 0). According to the rules, the result would be DECIMAL(38 + 10 + 1, 18), - // which is out of range and therefore it will becomes DECIMAL(38, 7), leading to + // which is out of range and therefore it will become DECIMAL(38, 7), leading to // potentially loosing 11 digits of the fractional part. Using only the precision needed // by the Literal, instead, the result would be DECIMAL(38 + 1 + 1, 18), which would // become DECIMAL(38, 16), safely having a much lower precision loss. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala index fb25e781e7..0f6d86691b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala @@ -44,7 +44,7 @@ object CodegenObjectFactoryMode extends Enumeration { /** * A codegen object generator which creates objects with codegen path first. Once any compile - * error happens, it can fallbacks to interpreted implementation. In tests, we can use a SQL config + * error happens, it can fallback to interpreted implementation. In tests, we can use a SQL config * `SQLConf.CODEGEN_FACTORY_MODE` to control fallback behavior. */ abstract class CodeGeneratorWithInterpretedFallback[IN, OUT] { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala index 464566b0cb..d8f046c002 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.types.AbstractDataType * This trait is typically used by operator expressions (e.g. [[Add]], [[Subtract]]) to define * expected input types without any implicit casting. * - * Most function expressions (e.g. [[Substring]] should extends [[ImplicitCastInputTypes]]) instead. + * Most function expressions (e.g. [[Substring]] should extend [[ImplicitCastInputTypes]]) instead. */ trait ExpectsInputTypes extends Expression { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index 197d7c7668..28a164b5d0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -766,7 +766,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite { * * To test this correctly, the given logical plan is wrapped in a fake operator that makes the * whole plan look like a streaming plan. Otherwise, a batch plan may throw not supported - * exception simply for not being a streaming plan, even though that plan could exists as batch + * exception simply for not being a streaming plan, even though that plan could exist as batch * subplan inside some streaming plan. */ def assertSupportedInStreamingPlan( @@ -793,7 +793,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite { * * To test this correctly, the given logical plan is wrapped in a fake operator that makes the * whole plan look like a streaming plan. Otherwise, a batch plan may throw not supported - * exception simply for not being a streaming plan, even though that plan could exists as batch + * exception simply for not being a streaming plan, even though that plan could exist as batch * subplan inside some streaming plan. */ def assertNotSupportedInStreamingPlan( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index 630113ce2d..dd20e6497f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -144,7 +144,7 @@ class EncoderResolutionSuite extends PlanTest { // It should pass analysis val bound = encoder.resolveAndBind(attrs) - // If no null values appear, it should works fine + // If no null values appear, it should work fine bound.fromRow(InternalRow(new GenericArrayData(Array(1, 2)))) // If there is null value, it should throw runtime exception diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala index b4f0ae1eb1..98f58a3056 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala @@ -110,7 +110,7 @@ object SQLMetrics { * spill size, etc. */ def createSizeMetric(sc: SparkContext, name: String): SQLMetric = { - // The final result of this metric in physical operator UI may looks like: + // The final result of this metric in physical operator UI may look like: // data size total (min, med, max): // 100GB (100MB, 1GB, 10GB) val acc = new SQLMetric(SIZE_METRIC, -1) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala index 8c016abc5b..103fa7ce90 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala @@ -50,7 +50,7 @@ class FileStreamSource( @transient private val fs = new Path(path).getFileSystem(hadoopConf) private val qualifiedBasePath: Path = { - fs.makeQualified(new Path(path)) // can contains glob patterns + fs.makeQualified(new Path(path)) // can contain glob patterns } private val optionsWithPartitionBasePath = sourceOptions.optionMapWithoutPath ++ { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala index 1e158323d2..ae1bfa2e49 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala @@ -312,7 +312,7 @@ trait ProgressReporter extends Logging { // DataSourceV2ScanExec records the number of rows it has read using SQLMetrics. However, // just collecting all DataSourceV2ScanExec nodes and getting the metric is not correct as // a DataSourceV2ScanExec instance may be referred to in the execution plan from two (or - // even multiple times) points and considering it twice will leads to double counting. We + // even multiple times) points and considering it twice will lead to double counting. We // can't dedup them using their hashcode either because two different instances of // DataSourceV2ScanExec can have the same hashcode but account for separate sets of // records read, and deduping them to consider only one of them would be undercounting the diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala index ac70488feb..2fb8f70a20 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -76,7 +76,7 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with /** * Disable stdout and stderr when running the test. To not output the logs to the console, - * ConsoleAppender's `follow` should be set to `true` so that it will honors reassignments of + * ConsoleAppender's `follow` should be set to `true` so that it will honor reassignments of * System.out or System.err. Otherwise, ConsoleAppender will still output to the console even if * we change System.out and System.err. */ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala index 1e801fe184..27d807cc35 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.command.DataWritingCommand /** * Create table and insert the query result into it. * - * @param tableDesc the Table Describe, which may contains serde, storage handler etc. + * @param tableDesc the Table Describe, which may contain serde, storage handler etc. * @param query the query whose result will be insert into the new relation * @param mode SaveMode */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 741b0124c8..b9c32e789a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -84,7 +84,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd } // Testing the Broadcast based join for cartesian join (cross join) - // We assume that the Broadcast Join Threshold will works since the src is a small table + // We assume that the Broadcast Join Threshold will work since the src is a small table private val spark_10484_1 = """ | SELECT a.key, b.key | FROM src a LEFT JOIN src b WHERE a.key > b.key + 300