diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala index f86fd20e59..477b01968c 100644 --- a/core/src/main/scala/org/apache/spark/SSLOptions.scala +++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala @@ -94,21 +94,23 @@ private[spark] case class SSLOptions( * are supported by the current Java security provider for this protocol. */ private val supportedAlgorithms: Set[String] = if (enabledAlgorithms.isEmpty) { - Set() + Set.empty } else { var context: SSLContext = null - try { - context = SSLContext.getInstance(protocol.orNull) - /* The set of supported algorithms does not depend upon the keys, trust, or + if (protocol.isEmpty) { + logDebug("No SSL protocol specified") + context = SSLContext.getDefault + } else { + try { + context = SSLContext.getInstance(protocol.get) + /* The set of supported algorithms does not depend upon the keys, trust, or rng, although they will influence which algorithms are eventually used. */ - context.init(null, null, null) - } catch { - case npe: NullPointerException => - logDebug("No SSL protocol specified") - context = SSLContext.getDefault - case nsa: NoSuchAlgorithmException => - logDebug(s"No support for requested SSL protocol ${protocol.get}") - context = SSLContext.getDefault + context.init(null, null, null) + } catch { + case nsa: NoSuchAlgorithmException => + logDebug(s"No support for requested SSL protocol ${protocol.get}") + context = SSLContext.getDefault + } } val providerAlgorithms = context.getServerSocketFactory.getSupportedCipherSuites.toSet diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index 3196c1ece1..45ed9860ea 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -420,7 +420,7 @@ object SparkEnv extends Logging { if (!conf.contains("spark.scheduler.mode")) { Seq(("spark.scheduler.mode", schedulingMode)) } else { - Seq[(String, String)]() + Seq.empty[(String, String)] } val sparkProperties = (conf.getAll ++ schedulerMode).sorted diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index 3f912dc191..a80016dd22 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -58,8 +58,8 @@ private[spark] object TestUtils { def createJarWithClasses( classNames: Seq[String], toStringValue: String = "", - classNamesWithBase: Seq[(String, String)] = Seq(), - classpathUrls: Seq[URL] = Seq()): URL = { + classNamesWithBase: Seq[(String, String)] = Seq.empty, + classpathUrls: Seq[URL] = Seq.empty): URL = { val tempDir = Utils.createTempDir() val files1 = for (name <- classNames) yield { createCompiledClass(name, tempDir, toStringValue, classpathUrls = classpathUrls) @@ -137,7 +137,7 @@ private[spark] object TestUtils { val options = if (classpathUrls.nonEmpty) { Seq("-classpath", classpathUrls.map { _.getFile }.mkString(File.pathSeparator)) } else { - Seq() + Seq.empty } compiler.getTask(null, null, null, options.asJava, null, Arrays.asList(sourceFile)).call() @@ -160,7 +160,7 @@ private[spark] object TestUtils { destDir: File, toStringValue: String = "", baseClass: String = null, - classpathUrls: Seq[URL] = Seq()): File = { + classpathUrls: Seq[URL] = Seq.empty): File = { val extendsText = Option(baseClass).map { c => s" extends ${c}" }.getOrElse("") val sourceFile = new JavaSourceFromString(className, "public class " + className + extendsText + " implements java.io.Serializable {" + diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index fb0405b1a6..6a81752400 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -974,6 +974,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial } } } + super.finalize() } } // scalastyle:on no.finalize diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala index dad928cdcf..537ab57f96 100644 --- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -128,8 +128,7 @@ private[spark] object SerDe { } def readBoolean(in: DataInputStream): Boolean = { - val intVal = in.readInt() - if (intVal == 0) false else true + in.readInt() != 0 } def readDate(in: DataInputStream): Date = { diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 6afe58bff5..ccbabf09a8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -337,7 +337,7 @@ class SparkHadoopUtil extends Logging { if (credentials != null) { credentials.getAllTokens.asScala.map(tokenToString) } else { - Seq() + Seq.empty } } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index b2a50bd055..687fd2d3ff 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -317,7 +317,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) val newLastScanTime = getNewLastScanTime() logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime") val statusList = Option(fs.listStatus(new Path(logDir))).map(_.toSeq) - .getOrElse(Seq[FileStatus]()) + .getOrElse(Seq.empty[FileStatus]) // scan for modified applications, replay and merge them val logInfos: Seq[FileStatus] = statusList .filter { entry => diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala index 8cfd0f6829..e42f41b972 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala @@ -55,7 +55,7 @@ class MasterWebUI( } def addProxyTargets(id: String, target: String): Unit = { - var endTarget = target.stripSuffix("/") + val endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala index cba4aaffe2..12e0dae3f5 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala @@ -44,7 +44,7 @@ object CommandUtils extends Logging { memory: Int, sparkHome: String, substituteArguments: String => String, - classPaths: Seq[String] = Seq[String](), + classPaths: Seq[String] = Seq.empty, env: Map[String, String] = sys.env): ProcessBuilder = { val localCommand = buildLocalCommand( command, securityMgr, substituteArguments, classPaths, env) @@ -73,7 +73,7 @@ object CommandUtils extends Logging { command: Command, securityMgr: SecurityManager, substituteArguments: String => String, - classPath: Seq[String] = Seq[String](), + classPath: Seq[String] = Seq.empty, env: Map[String, String]): Command = { val libraryPathName = Utils.libraryPathEnvName val libraryPathEntries = command.libraryPathEntries @@ -96,7 +96,7 @@ object CommandUtils extends Logging { command.arguments.map(substituteArguments), newEnvironment, command.classPathEntries ++ classPath, - Seq[String](), // library path already captured in environment variable + Seq.empty, // library path already captured in environment variable // filter out auth secret from java options command.javaOpts.filterNot(_.startsWith("-D" + SecurityManager.SPARK_AUTH_SECRET_CONF))) } diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala index 2610d6f6e4..8058a4d5db 100644 --- a/core/src/main/scala/org/apache/spark/package.scala +++ b/core/src/main/scala/org/apache/spark/package.scala @@ -17,6 +17,8 @@ package org.apache +import java.util.Properties + /** * Core Spark functionality. [[org.apache.spark.SparkContext]] serves as the main entry point to * Spark, while [[org.apache.spark.rdd.RDD]] is the data type representing a distributed collection, @@ -40,9 +42,6 @@ package org.apache * Developer API are intended for advanced users want to extend Spark through lower * level interfaces. These are subject to changes or removal in minor releases. */ - -import java.util.Properties - package object spark { private object SparkBuildInfo { @@ -57,6 +56,9 @@ package object spark { val resourceStream = Thread.currentThread().getContextClassLoader. getResourceAsStream("spark-version-info.properties") + if (resourceStream == null) { + throw new SparkException("Could not find spark-version-info.properties") + } try { val unknownProp = "" @@ -71,8 +73,6 @@ package object spark { props.getProperty("date", unknownProp) ) } catch { - case npe: NullPointerException => - throw new SparkException("Error while locating file spark-version-info.properties", npe) case e: Exception => throw new SparkException("Error loading properties from spark-version-info.properties", e) } finally { diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala index 2cba1febe8..10451a324b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala @@ -269,7 +269,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10) tries = 0 // if we don't have enough partition groups, create duplicates while (numCreated < targetLen) { - var (nxt_replica, nxt_part) = partitionLocs.partsWithLocs(tries) + val (nxt_replica, nxt_part) = partitionLocs.partsWithLocs(tries) tries += 1 val pgroup = new PartitionGroup(Some(nxt_replica)) groupArr += pgroup diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala index 1181371ab4..f4b0ab1015 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala @@ -97,7 +97,7 @@ private[spark] class Pool( } override def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager] = { - var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager] + val sortedTaskSetQueue = new ArrayBuffer[TaskSetManager] val sortedSchedulableQueue = schedulableQueue.asScala.toSeq.sortWith(taskSetSchedulingAlgorithm.comparator) for (schedulable <- sortedSchedulableQueue) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala index 366b92c5f2..836769e172 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala @@ -60,7 +60,7 @@ private[spark] class DirectTaskResult[T]( val numUpdates = in.readInt if (numUpdates == 0) { - accumUpdates = Seq() + accumUpdates = Seq.empty } else { val _accumUpdates = new ArrayBuffer[AccumulatorV2[_, _]] for (i <- 0 until numUpdates) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 3968fb7e63..589fe672ad 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -891,7 +891,7 @@ private[spark] class TaskSetManager( override def removeSchedulable(schedulable: Schedulable) {} override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { - var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]() + val sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]() sortedTaskSetQueue += this sortedTaskSetQueue } @@ -948,7 +948,7 @@ private[spark] class TaskSetManager( if (tasksSuccessful >= minFinishedForSpeculation && tasksSuccessful > 0) { val time = clock.getTimeMillis() - var medianDuration = successfulTaskDurations.median + val medianDuration = successfulTaskDurations.median val threshold = max(SPECULATION_MULTIPLIER * medianDuration, minTimeToSpeculation) // TODO: Threshold should also look at standard deviation of task durations and have a lower // bound based on that. diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index 0b396b794d..a46824a0c6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -23,7 +23,6 @@ import javax.annotation.concurrent.GuardedBy import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import scala.concurrent.Future -import scala.concurrent.duration.Duration import org.apache.spark.{ExecutorAllocationClient, SparkEnv, SparkException, TaskState} import org.apache.spark.internal.Logging @@ -427,11 +426,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp * be called in the yarn-client mode when AM re-registers after a failure. * */ protected def reset(): Unit = { - val executors = synchronized { + val executors: Set[String] = synchronized { requestedTotalExecutors = 0 numPendingExecutors = 0 executorsPendingToRemove.clear() - Set() ++ executorDataMap.keys + executorDataMap.keys.toSet } // Remove all the lingering executors that should be removed but not yet. The reason might be diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index adbe3cfd89..aaacabe79a 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -1275,11 +1275,11 @@ private[spark] class BlockManager( val numPeersToReplicateTo = level.replication - 1 val startTime = System.nanoTime - var peersReplicatedTo = mutable.HashSet.empty ++ existingReplicas - var peersFailedToReplicateTo = mutable.HashSet.empty[BlockManagerId] + val peersReplicatedTo = mutable.HashSet.empty ++ existingReplicas + val peersFailedToReplicateTo = mutable.HashSet.empty[BlockManagerId] var numFailures = 0 - val initialPeers = getPeers(false).filterNot(existingReplicas.contains(_)) + val initialPeers = getPeers(false).filterNot(existingReplicas.contains) var peersForReplication = blockReplicationPolicy.prioritize( blockManagerId, diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index cce7a7611b..a7f2caafe0 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -241,7 +241,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { }.getOrElse(jobIdTitle) val jobSortDesc = Option(parameterJobSortDesc).map(_.toBoolean).getOrElse( // New jobs should be shown above old jobs by default. - if (jobSortColumn == jobIdTitle) true else false + jobSortColumn == jobIdTitle ) val jobPageSize = Option(parameterJobPageSize).map(_.toInt).getOrElse(100) val jobPrevPageSize = Option(parameterJobPrevPageSize).map(_.toInt).getOrElse(jobPageSize) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala index 2b0816e357..a30c135929 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala @@ -115,7 +115,7 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") { if (sc.isDefined && isFairScheduler) {

{pools.size} Fair Scheduler Pools

++ poolTable.toNodeSeq } else { - Seq[Node]() + Seq.empty[Node] } } if (shouldShowActiveStages) { diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala index b164f32b62..819fe57e14 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala @@ -41,7 +41,7 @@ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") { val poolToActiveStages = listener.poolToActiveStages val activeStages = poolToActiveStages.get(poolName) match { case Some(s) => s.values.toSeq - case None => Seq[StageInfo]() + case None => Seq.empty[StageInfo] } val shouldShowActiveStages = activeStages.nonEmpty val activeStagesTable = diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 6b3dadc333..8ed51746ab 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -565,7 +565,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { val executorTable = new ExecutorTable(stageId, stageAttemptId, parent) val maybeAccumulableTable: Seq[Node] = - if (hasAccumulators) {

Accumulators

++ accumulableTable } else Seq() + if (hasAccumulators) {

Accumulators

++ accumulableTable } else Seq.empty val aggMetrics = logDebug(" " + f) } - logDebug(" + declared methods: " + declaredMethods.size) - declaredMethods.foreach { m => logDebug(" " + m) } - logDebug(" + inner classes: " + innerClasses.size) - innerClasses.foreach { c => logDebug(" " + c.getName) } - logDebug(" + outer classes: " + outerClasses.size) - outerClasses.foreach { c => logDebug(" " + c.getName) } - logDebug(" + outer objects: " + outerObjects.size) - outerObjects.foreach { o => logDebug(" " + o) } + if (log.isDebugEnabled) { + logDebug(" + declared fields: " + declaredFields.size) + declaredFields.foreach { f => logDebug(" " + f) } + logDebug(" + declared methods: " + declaredMethods.size) + declaredMethods.foreach { m => logDebug(" " + m) } + logDebug(" + inner classes: " + innerClasses.size) + innerClasses.foreach { c => logDebug(" " + c.getName) } + logDebug(" + outer classes: " + outerClasses.size) + outerClasses.foreach { c => logDebug(" " + c.getName) } + logDebug(" + outer objects: " + outerObjects.size) + outerObjects.foreach { o => logDebug(" " + o) } + } // Fail fast if we detect return statements in closures getClassReader(func.getClass).accept(new ReturnStatementFinder(), 0) @@ -201,7 +203,7 @@ private[spark] object ClosureCleaner extends Logging { // Initialize accessed fields with the outer classes first // This step is needed to associate the fields to the correct classes later for (cls <- outerClasses) { - accessedFields(cls) = Set[String]() + accessedFields(cls) = Set.empty[String] } // Populate accessed fields by visiting all fields and methods accessed by this and // all of its inner closures. If transitive cleaning is enabled, this may recursively diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala index 806d14e7cc..8406826a22 100644 --- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala @@ -696,7 +696,7 @@ private[spark] object JsonProtocol { val accumulatedValues = { Utils.jsonOption(json \ "Accumulables").map(_.extract[List[JValue]]) match { case Some(values) => values.map(accumulableInfoFromJson) - case None => Seq[AccumulableInfo]() + case None => Seq.empty[AccumulableInfo] } } @@ -726,7 +726,7 @@ private[spark] object JsonProtocol { val killed = Utils.jsonOption(json \ "Killed").exists(_.extract[Boolean]) val accumulables = Utils.jsonOption(json \ "Accumulables").map(_.extract[Seq[JValue]]) match { case Some(values) => values.map(accumulableInfoFromJson) - case None => Seq[AccumulableInfo]() + case None => Seq.empty[AccumulableInfo] } val taskInfo = diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 584337a71c..d661293e52 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1443,7 +1443,7 @@ private[spark] object Utils extends Logging { var firstUserFile = "" var firstUserLine = 0 var insideSpark = true - var callStack = new ArrayBuffer[String]() :+ "" + val callStack = new ArrayBuffer[String]() :+ "" Thread.currentThread.getStackTrace().foreach { ste: StackTraceElement => // When running under some profilers, the current stack trace might contain some bogus @@ -2438,7 +2438,7 @@ private[spark] object Utils extends Logging { .getOrElse(UserGroupInformation.getCurrentUser().getShortUserName()) } - val EMPTY_USER_GROUPS = Set[String]() + val EMPTY_USER_GROUPS = Set.empty[String] // Returns the groups to which the current user belongs. def getCurrentUserGroups(sparkConf: SparkConf, username: String): Set[String] = { @@ -2587,7 +2587,7 @@ private[spark] object Utils extends Logging { * Unions two comma-separated lists of files and filters out empty strings. */ def unionFileLists(leftList: Option[String], rightList: Option[String]): Set[String] = { - var allFiles = Set[String]() + var allFiles = Set.empty[String] leftList.foreach { value => allFiles ++= value.split(",") } rightList.foreach { value => allFiles ++= value.split(",") } allFiles.filter { _.nonEmpty } diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala index a897cad02f..8dbb7ee4e5 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala @@ -53,16 +53,16 @@ object LocalFileLR { val fileSrc = scala.io.Source.fromFile(args(0)) val lines = fileSrc.getLines().toArray - val points = lines.map(parsePoint _) + val points = lines.map(parsePoint) val ITERATIONS = args(1).toInt // Initialize w to a random value - var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} + val w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) - var gradient = DenseVector.zeros[Double](D) + val gradient = DenseVector.zeros[Double](D) for (p <- points) { val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y gradient += p.x * scale diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala index fca585c2a3..963c9a56d6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala @@ -47,12 +47,11 @@ object LocalKMeans { } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { - var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { - val vCurr = centers.get(i).get + val vCurr = centers(i) val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist @@ -76,8 +75,8 @@ object LocalKMeans { showWarning() val data = generateData - var points = new HashSet[Vector[Double]] - var kPoints = new HashMap[Int, Vector[Double]] + val points = new HashSet[Vector[Double]] + val kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { @@ -92,11 +91,11 @@ object LocalKMeans { println("Initial centers: " + kPoints) while(tempDist > convergeDist) { - var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) + val closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) - var mappings = closest.groupBy[Int] (x => x._1) + val mappings = closest.groupBy[Int] (x => x._1) - var pointStats = mappings.map { pair => + val pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } @@ -107,7 +106,7 @@ object LocalKMeans { tempDist = 0.0 for (mapping <- newPoints) { - tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) + tempDist += squaredDistance(kPoints(mapping._1), mapping._2) } for (newP <- newPoints) { diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala index 13ccc2ae7c..eb5221f085 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala @@ -60,12 +60,12 @@ object LocalLR { val data = generateData // Initialize w to a random value - var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} + val w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) - var gradient = DenseVector.zeros[Double](D) + val gradient = DenseVector.zeros[Double](D) for (p <- data) { val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y gradient += p.x * scale diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala index 05ac6cbcb3..9d675bbc18 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala @@ -40,8 +40,8 @@ object SparkHdfsLR { def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") - var y = tok.nextToken.toDouble - var x = new Array[Double](D) + val y = tok.nextToken.toDouble + val x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 @@ -78,7 +78,7 @@ object SparkHdfsLR { val ITERATIONS = args(1).toInt // Initialize w to a random value - var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} + val w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala index cb2be091ff..c18e3d31f1 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala @@ -72,7 +72,7 @@ object SparkLR { val points = spark.sparkContext.parallelize(generateData, numSlices).cache() // Initialize w to a random value - var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} + val w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index b03701e491..19f2d7751b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala @@ -251,7 +251,7 @@ object DecisionTreeExample { .setMinInfoGain(params.minInfoGain) .setCacheNodeIds(params.cacheNodeIds) .setCheckpointInterval(params.checkpointInterval) - case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.") + case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } stages += dt val pipeline = new Pipeline().setStages(stages.toArray) @@ -278,7 +278,7 @@ object DecisionTreeExample { } else { println(treeModel) // Print model summary. } - case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.") + case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } // Evaluate model on training, test data. @@ -294,7 +294,7 @@ object DecisionTreeExample { println("Test data results:") evaluateRegressionModel(pipelineModel, test, labelColName) case _ => - throw new IllegalArgumentException("Algo ${params.algo} not supported.") + throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala index 3bd8ff54c2..8f3ce4b315 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala @@ -190,7 +190,7 @@ object GBTExample { .setCacheNodeIds(params.cacheNodeIds) .setCheckpointInterval(params.checkpointInterval) .setMaxIter(params.maxIter) - case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.") + case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } stages += dt val pipeline = new Pipeline().setStages(stages.toArray) @@ -217,7 +217,7 @@ object GBTExample { } else { println(rfModel) // Print model summary. } - case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.") + case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } // Evaluate model on training, test data. @@ -233,7 +233,7 @@ object GBTExample { println("Test data results:") DecisionTreeExample.evaluateRegressionModel(pipelineModel, test, labelColName) case _ => - throw new IllegalArgumentException("Algo ${params.algo} not supported.") + throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala index a735c218c0..3c127a46e1 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala @@ -198,7 +198,7 @@ object RandomForestExample { .setCheckpointInterval(params.checkpointInterval) .setFeatureSubsetStrategy(params.featureSubsetStrategy) .setNumTrees(params.numTrees) - case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.") + case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } stages += dt val pipeline = new Pipeline().setStages(stages.toArray) @@ -225,7 +225,7 @@ object RandomForestExample { } else { println(rfModel) // Print model summary. } - case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.") + case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } // Evaluate model on training, test data. @@ -241,7 +241,7 @@ object RandomForestExample { println("Test data results:") DecisionTreeExample.evaluateRegressionModel(pipelineModel, test, labelColName) case _ => - throw new IllegalArgumentException("Algo ${params.algo} not supported.") + throw new IllegalArgumentException(s"Algo ${params.algo} not supported.") } spark.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index 0ad0465a02..fa47e12857 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -211,7 +211,7 @@ object DecisionTreeRunner { case Regression => (origExamples, null, 0) case _ => - throw new IllegalArgumentException("Algo ${params.algo} not supported.") + throw new IllegalArgumentException(s"Algo $algo not supported.") } // Create training, test sets. diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala index f86b8f586d..5915d9f99a 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala @@ -378,7 +378,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L zkUtils.getLeaderForPartition(topic, partition).isDefined && Request.isValidBrokerId(leaderAndInSyncReplicas.leader) && - leaderAndInSyncReplicas.isr.size >= 1 + leaderAndInSyncReplicas.isr.nonEmpty case _ => false diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala index 62cdf5b113..d9fc9cc206 100644 --- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala +++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala @@ -156,7 +156,7 @@ private[spark] class KafkaRDD[K, V]( val prefExecs = if (null == prefHost) allExecs else allExecs.filter(_.host == prefHost) val execs = if (prefExecs.isEmpty) allExecs else prefExecs if (execs.isEmpty) { - Seq() + Seq.empty } else { // execs is sorted, tp.hashCode depends only on topic and partition, so consistent index val index = Math.floorMod(tp.hashCode, execs.length) diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala index 8273c2b49f..6c7024ea4b 100644 --- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala +++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala @@ -257,7 +257,7 @@ private[kafka010] class KafkaTestUtils extends Logging { zkUtils.getLeaderForPartition(topic, partition).isDefined && Request.isValidBrokerId(leaderAndInSyncReplicas.leader) && - leaderAndInSyncReplicas.isr.size >= 1 + leaderAndInSyncReplicas.isr.nonEmpty case _ => false diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala index 26349f4d88..0e6a340a68 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala @@ -388,7 +388,7 @@ class EdgePartition[ val aggregates = new Array[A](vertexAttrs.length) val bitset = new BitSet(vertexAttrs.length) - var ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset) + val ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset) var i = 0 while (i < size) { val localSrcId = localSrcIds(i) @@ -433,7 +433,7 @@ class EdgePartition[ val aggregates = new Array[A](vertexAttrs.length) val bitset = new BitSet(vertexAttrs.length) - var ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset) + val ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset) index.iterator.foreach { cluster => val clusterSrcId = cluster._1 val clusterPos = cluster._2 diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala index 5d2a53782b..34e1253ff4 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala @@ -74,7 +74,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected ( override def getCheckpointFiles: Seq[String] = { Seq(vertices.getCheckpointFile, replicatedVertexView.edges.getCheckpointFile).flatMap { case Some(path) => Seq(path) - case None => Seq() + case None => Seq.empty } } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala index 2b3e5f98c4..419731146d 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala @@ -18,6 +18,7 @@ package org.apache.spark.graphx.util import scala.annotation.tailrec +import scala.collection.mutable import scala.reflect.ClassTag import scala.util._ @@ -133,7 +134,7 @@ object GraphGenerators extends Logging { throw new IllegalArgumentException( s"numEdges must be <= $numEdgesUpperBound but was $numEdges") } - var edges: Set[Edge[Int]] = Set() + var edges = mutable.Set.empty[Edge[Int]] while (edges.size < numEdges) { if (edges.size % 100 == 0) { logDebug(edges.size + " edges") diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index b234bc4c2d..65b09e5712 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -736,7 +736,7 @@ class LogisticRegression @Since("1.2.0") ( b_k' = b_k - \mean(b_k) }}} */ - val rawIntercepts = histogram.map(c => math.log(c + 1)) // add 1 for smoothing + val rawIntercepts = histogram.map(math.log1p) // add 1 for smoothing (log1p(x) = log(1+x)) val rawMean = rawIntercepts.sum / rawIntercepts.length rawIntercepts.indices.foreach { i => initialCoefWithInterceptMatrix.update(i, numFeatures, rawIntercepts(i) - rawMean) @@ -820,7 +820,7 @@ class LogisticRegression @Since("1.2.0") ( val interceptVec = if ($(fitIntercept) || !isMultinomial) { Vectors.zeros(numCoefficientSets) } else { - Vectors.sparse(numCoefficientSets, Seq()) + Vectors.sparse(numCoefficientSets, Seq.empty) } // separate intercepts and coefficients from the combined matrix allCoefMatrix.foreachActive { (classIndex, featureIndex, value) => diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala index 2dd565a782..32835fb3aa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala @@ -99,7 +99,7 @@ private[ml] case class ParsedRFormula(label: ColumnRef, terms: Seq[Term]) { }).map(_.distinct) // Deduplicates feature interactions, for example, a:b is the same as b:a. - var seen = mutable.Set[Set[String]]() + val seen = mutable.Set[Set[String]]() validInteractions.flatMap { case t if seen.contains(t.toSet) => None diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 91cd229704..ccc61feee8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -286,7 +286,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String s"training is not needed.") } if (handlePersistence) instances.unpersist() - val coefficients = Vectors.sparse(numFeatures, Seq()) + val coefficients = Vectors.sparse(numFeatures, Seq.empty) val intercept = yMean val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 98e50c5b45..49043b5acb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -363,7 +363,7 @@ class KMeans private ( // to their squared distance from the centers. Note that only distances between points // and new centers are computed in each iteration. var step = 0 - var bcNewCentersList = ArrayBuffer[Broadcast[_]]() + val bcNewCentersList = ArrayBuffer[Broadcast[_]]() while (step < initializationSteps) { val bcNewCenters = data.context.broadcast(newCenters) bcNewCentersList += bcNewCenters diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 7695aabf43..c7c1a5404e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -78,13 +78,13 @@ private[mllib] object EigenValueDecomposition { require(n * ncv.toLong <= Integer.MAX_VALUE && ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, s"k = $k and/or n = $n are too large to compute an eigendecomposition") - var ido = new intW(0) - var info = new intW(0) - var resid = new Array[Double](n) - var v = new Array[Double](n * ncv) - var workd = new Array[Double](n * 3) - var workl = new Array[Double](ncv * (ncv + 8)) - var ipntr = new Array[Int](11) + val ido = new intW(0) + val info = new intW(0) + val resid = new Array[Double](n) + val v = new Array[Double](n * ncv) + val workd = new Array[Double](n * 3) + val workl = new Array[Double](ncv * (ncv + 8)) + val ipntr = new Array[Int](11) // call ARPACK's reverse communication, first iteration with ido = 0 arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, workd, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index efedebe301..21ec287e49 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -257,7 +257,7 @@ object LBFGS extends Logging { (denseGrad1, loss1 + loss2) } - val zeroSparseVector = Vectors.sparse(n, Seq()) + val zeroSparseVector = Vectors.sparse(n, Seq.empty) val (gradientSum, lossSum) = data.treeAggregate((zeroSparseVector, 0.0))(seqOp, combOp) // broadcasted model is not needed anymore diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala index b760347bcb..ee51d33239 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala @@ -57,7 +57,7 @@ private[stat] object SpearmanCorrelation extends Correlation with Logging { var preCol = -1 var preVal = Double.NaN var startRank = -1.0 - var cachedUids = ArrayBuffer.empty[Long] + val cachedUids = ArrayBuffer.empty[Long] val flush: () => Iterable[(Long, (Int, Double))] = () => { val averageRank = startRank + (cachedUids.size - 1) / 2.0 val output = cachedUids.map { uid => diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala index 551ea35795..80c6ef0ea1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala @@ -133,7 +133,7 @@ class StreamingTest @Since("1.6.0") () extends Logging with Serializable { if (time.milliseconds > data.slideDuration.milliseconds * peacePeriod) { rdd } else { - data.context.sparkContext.parallelize(Seq()) + data.context.sparkContext.parallelize(Seq.empty) } } } diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 41f3a0451a..b9db1df2d1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -589,7 +589,7 @@ object PySparkAssembly { val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip") zipFile.delete() zipRecursive(src, zipFile) - Seq[File]() + Seq.empty[File] }).value ) @@ -810,7 +810,7 @@ object TestSettings { require(d.mkdir() || d.isDirectory(), s"Failed to create directory $d") } } - Seq[File]() + Seq.empty[File] }).value, concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), // Remove certain packages from Scaladoc diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala index a6bb5d5915..022191d007 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala @@ -112,7 +112,7 @@ private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver") Last Task Status {state.mesosTaskStatus.map(_.toString).getOrElse("")} - }.getOrElse(Seq[Node]()) + }.getOrElse(Seq.empty[Node]) } private def propertiesRow(properties: collection.Map[String, String]): Seq[Node] = { @@ -175,6 +175,6 @@ private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver") {state.retries} - }.getOrElse(Seq[Node]()) + }.getOrElse(Seq.empty[Node]) } } diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 062ed1f93f..7ec116c74b 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -333,7 +333,7 @@ trait MesosSchedulerUtils extends Logging { try { splitter.split(constraintsVal).asScala.toMap.mapValues(v => if (v == null || v.isEmpty) { - Set[String]() + Set.empty[String] } else { v.split(',').toSet } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7745709e07..501e7e3c69 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2334,8 +2334,9 @@ object TimeWindowing extends Rule[LogicalPlan] { val windowExpressions = p.expressions.flatMap(_.collect { case t: TimeWindow => t }).toSet + val numWindowExpr = windowExpressions.size // Only support a single window expression for now - if (windowExpressions.size == 1 && + if (numWindowExpr == 1 && windowExpressions.head.timeColumn.resolved && windowExpressions.head.checkInputDataTypes().isSuccess) { @@ -2402,7 +2403,7 @@ object TimeWindowing extends Rule[LogicalPlan] { renamedPlan.withNewChildren(substitutedPlan :: Nil) } - } else if (windowExpressions.size > 1) { + } else if (numWindowExpr > 1) { p.failAnalysis("Multiple time window expressions would result in a cartesian product " + "of rows, therefore they are currently not supported.") } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index c863ba4341..83a23cc97e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -228,10 +228,10 @@ case class ArrayContains(left: Expression, right: Expression) override def dataType: DataType = BooleanType override def inputTypes: Seq[AbstractDataType] = right.dataType match { - case NullType => Seq() + case NullType => Seq.empty case _ => left.dataType match { case n @ ArrayType(element, _) => Seq(n, element) - case _ => Seq() + case _ => Seq.empty } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala index db7baf6e9b..064ca68b7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala @@ -150,7 +150,7 @@ object JoinReorderDP extends PredicateHelper with Logging { // Create the initial plans: each plan is a single item with zero cost. val itemIndex = items.zipWithIndex val foundPlans = mutable.Buffer[JoinPlanMap](itemIndex.map { - case (item, id) => Set(id) -> JoinPlan(Set(id), item, Set(), Cost(0, 0)) + case (item, id) => Set(id) -> JoinPlan(Set(id), item, Set.empty, Cost(0, 0)) }.toMap) // Build filters from the join graph to be used by the search algorithm. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 6c83f47900..79a6c8663a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -134,7 +134,7 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] { private def collectGroupingExpressions(plan: LogicalPlan): ExpressionSet = plan match { case Aggregate(groupingExpressions, aggregateExpressions, child) => ExpressionSet.apply(groupingExpressions) - case _ => ExpressionSet(Seq()) + case _ => ExpressionSet(Seq.empty) } def apply(plan: LogicalPlan): LogicalPlan = plan transform { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ad359e714b..45c1d3d430 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -877,7 +877,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging // Reverse the contexts to have them in the same sequence as in the SQL statement & turn them // into expressions. - val expressions = contexts.reverse.map(expression) + val expressions = contexts.reverseMap(expression) // Create a balanced tree. def reduceToExpressionTree(low: Int, high: Int): Expression = high - low match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index 7f370fb731..8d034c21a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -173,7 +173,7 @@ object ExtractFiltersAndInnerJoins extends PredicateHelper { val (plans, conditions) = flattenJoin(j) (plans, conditions ++ splitConjunctivePredicates(filterCondition)) - case _ => (Seq((plan, parentJoinType)), Seq()) + case _ => (Seq((plan, parentJoinType)), Seq.empty) } def unapply(plan: LogicalPlan): Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala index e13db85c7a..74820eb97d 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala @@ -47,7 +47,7 @@ case class FilterEstimation(plan: Filter) extends Logging { // Estimate selectivity of this filter predicate, and update column stats if needed. // For not-supported condition, set filter selectivity to a conservative estimate 100% - val filterSelectivity = calculateFilterSelectivity(plan.condition).getOrElse(BigDecimal(1.0)) + val filterSelectivity = calculateFilterSelectivity(plan.condition).getOrElse(BigDecimal(1)) val filteredRowCount: BigInt = ceil(BigDecimal(childStats.rowCount.get) * filterSelectivity) val newColStats = if (filteredRowCount == 0) { @@ -83,13 +83,13 @@ case class FilterEstimation(plan: Filter) extends Logging { : Option[BigDecimal] = { condition match { case And(cond1, cond2) => - val percent1 = calculateFilterSelectivity(cond1, update).getOrElse(BigDecimal(1.0)) - val percent2 = calculateFilterSelectivity(cond2, update).getOrElse(BigDecimal(1.0)) + val percent1 = calculateFilterSelectivity(cond1, update).getOrElse(BigDecimal(1)) + val percent2 = calculateFilterSelectivity(cond2, update).getOrElse(BigDecimal(1)) Some(percent1 * percent2) case Or(cond1, cond2) => - val percent1 = calculateFilterSelectivity(cond1, update = false).getOrElse(BigDecimal(1.0)) - val percent2 = calculateFilterSelectivity(cond2, update = false).getOrElse(BigDecimal(1.0)) + val percent1 = calculateFilterSelectivity(cond1, update = false).getOrElse(BigDecimal(1)) + val percent2 = calculateFilterSelectivity(cond2, update = false).getOrElse(BigDecimal(1)) Some(percent1 + percent2 - (percent1 * percent2)) // Not-operator pushdown @@ -464,7 +464,7 @@ case class FilterEstimation(plan: Filter) extends Logging { (numericLiteral > max, numericLiteral <= min) } - var percent = BigDecimal(1.0) + var percent = BigDecimal(1) if (noOverlap) { percent = 0.0 } else if (completeOverlap) { @@ -630,7 +630,7 @@ case class FilterEstimation(plan: Filter) extends Logging { ) } - var percent = BigDecimal(1.0) + var percent = BigDecimal(1) if (noOverlap) { percent = 0.0 } else if (completeOverlap) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index f4d5a4471d..9ee777529a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -609,9 +609,9 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(BRound(floatPi, scale), floatResults(i), EmptyRow) } - val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14), - BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159), - BigDecimal(3.141593), BigDecimal(3.1415927)) + val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3), BigDecimal("3.1"), BigDecimal("3.14"), + BigDecimal("3.142"), BigDecimal("3.1416"), BigDecimal("3.14159"), + BigDecimal("3.141593"), BigDecimal("3.1415927")) (0 to 7).foreach { i => checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala index 144f3d688d..3193d1320a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala @@ -109,8 +109,8 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester { test("small decimals represented as unscaled long") { checkCompact(new Decimal(), true) - checkCompact(Decimal(BigDecimal(10.03)), false) - checkCompact(Decimal(BigDecimal(1e20)), false) + checkCompact(Decimal(BigDecimal("10.03")), false) + checkCompact(Decimal(BigDecimal("100000000000000000000")), false) checkCompact(Decimal(17L), true) checkCompact(Decimal(17), true) checkCompact(Decimal(17L, 2, 1), true) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index ba7ca84f22..dae160f1bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -671,11 +671,11 @@ case class AlterTableRecoverPartitionsCommand( } else { logWarning( s"expected partition column ${partitionNames.head}, but got ${ps(0)}, ignoring it") - Seq() + Seq.empty } } else { logWarning(s"ignore ${new Path(path, name)}") - Seq() + Seq.empty } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index a521fd1323..658d13768a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -23,7 +23,6 @@ import org.apache.spark.internal.Logging import org.apache.spark.Partition import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession, SQLContext} -import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.jdbc.JdbcDialects import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType @@ -81,7 +80,7 @@ private[sql] object JDBCRelation extends Logging { val column = partitioning.column var i: Int = 0 var currentValue: Long = lowerBound - var ans = new ArrayBuffer[Partition]() + val ans = new ArrayBuffer[Partition]() while (i < numPartitions) { val lBound = if (i != 0) s"$column >= $currentValue" else null currentValue += stride diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 87fbf8b1bc..64eea26a9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -220,7 +220,7 @@ class ParquetFileFormat val needMerged: Seq[FileStatus] = if (mergeRespectSummaries) { - Seq() + Seq.empty } else { filesByType.data } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala index 34391818f3..9e4e02b99b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala @@ -35,8 +35,6 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalGroupState import org.apache.spark.sql.execution.streaming.GroupStateImpl import org.apache.spark.sql.streaming.GroupStateTimeout import org.apache.spark.sql.types._ -import org.apache.spark.util.Utils - /** * Physical version of `ObjectProducer`. @@ -403,8 +401,7 @@ case class FlatMapGroupsInRExec( Seq(groupingAttributes.map(SortOrder(_, Ascending))) override protected def doExecute(): RDD[InternalRow] = { - val isSerializedRData = - if (outputSchema == SERIALIZED_R_DATA_SCHEMA) true else false + val isSerializedRData = outputSchema == SERIALIZED_R_DATA_SCHEMA val serializerForR = if (!isSerializedRData) { SerializationFormats.ROW } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala index d2178e971e..b9835c7dbb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala @@ -34,8 +34,7 @@ case class MapPartitionsRWrapper( outputSchema: StructType) extends (Iterator[Any] => Iterator[Any]) { def apply(iter: Iterator[Any]): Iterator[Any] = { // If the content of current DataFrame is serialized R data? - val isSerializedRData = - if (inputSchema == SERIALIZED_R_DATA_SCHEMA) true else false + val isSerializedRData = inputSchema == SERIALIZED_R_DATA_SCHEMA val (newIter, deserializer, colNames) = if (!isSerializedRData) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala index 408c8f81f1..e37033b19a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala @@ -170,12 +170,8 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag]( private def compact(batchId: Long, logs: Array[T]): Boolean = { val validBatches = getValidBatchesBeforeCompactionBatch(batchId, compactInterval) val allLogs = validBatches.flatMap(batchId => super.get(batchId)).flatten ++ logs - if (super.add(batchId, compactLogs(allLogs).toArray)) { - true - } else { - // Return false as there is another writer. - false - } + // Return false as there is another writer. + super.add(batchId, compactLogs(allLogs).toArray) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 5ee596e06d..5711262654 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -609,7 +609,7 @@ class StreamExecution( } // A list of attributes that will need to be updated. - var replacements = new ArrayBuffer[(Attribute, Attribute)] + val replacements = new ArrayBuffer[(Attribute, Attribute)] // Replace sources in the logical plan with data that has arrived since the last batch. val withNewSources = logicalPlan transform { case StreamingExecutionRelation(source, output) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 4568b67024..d802557b36 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -65,9 +65,9 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { checkAnswer( decimalData.groupBy("a").agg(sum("b")), - Seq(Row(new java.math.BigDecimal(1.0), new java.math.BigDecimal(3.0)), - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(3.0)), - Row(new java.math.BigDecimal(3.0), new java.math.BigDecimal(3.0))) + Seq(Row(new java.math.BigDecimal(1), new java.math.BigDecimal(3)), + Row(new java.math.BigDecimal(2), new java.math.BigDecimal(3)), + Row(new java.math.BigDecimal(3), new java.math.BigDecimal(3))) ) val decimalDataWithNulls = spark.sparkContext.parallelize( @@ -80,10 +80,10 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { DecimalData(null, 2) :: Nil).toDF() checkAnswer( decimalDataWithNulls.groupBy("a").agg(sum("b")), - Seq(Row(new java.math.BigDecimal(1.0), new java.math.BigDecimal(1.0)), - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(1.0)), - Row(new java.math.BigDecimal(3.0), new java.math.BigDecimal(3.0)), - Row(null, new java.math.BigDecimal(2.0))) + Seq(Row(new java.math.BigDecimal(1), new java.math.BigDecimal(1)), + Row(new java.math.BigDecimal(2), new java.math.BigDecimal(1)), + Row(new java.math.BigDecimal(3), new java.math.BigDecimal(3)), + Row(null, new java.math.BigDecimal(2))) ) } @@ -259,19 +259,19 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { checkAnswer( decimalData.agg(avg('a)), - Row(new java.math.BigDecimal(2.0))) + Row(new java.math.BigDecimal(2))) checkAnswer( decimalData.agg(avg('a), sumDistinct('a)), // non-partial - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil) + Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil) checkAnswer( decimalData.agg(avg('a cast DecimalType(10, 2))), - Row(new java.math.BigDecimal(2.0))) + Row(new java.math.BigDecimal(2))) // non-partial checkAnswer( decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))), - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil) + Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil) } test("null average") { @@ -520,9 +520,9 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { test("SQL decimal test (used for catching certain decimal handling bugs in aggregates)") { checkAnswer( decimalData.groupBy('a cast DecimalType(10, 2)).agg(avg('b cast DecimalType(10, 2))), - Seq(Row(new java.math.BigDecimal(1.0), new java.math.BigDecimal(1.5)), - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(1.5)), - Row(new java.math.BigDecimal(3.0), new java.math.BigDecimal(1.5)))) + Seq(Row(new java.math.BigDecimal(1), new java.math.BigDecimal("1.5")), + Row(new java.math.BigDecimal(2), new java.math.BigDecimal("1.5")), + Row(new java.math.BigDecimal(3), new java.math.BigDecimal("1.5")))) } test("SPARK-17616: distinct aggregate combined with a non-partial aggregate") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 3f3a6221d2..7c500728bd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1167,7 +1167,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } test("SPARK-6899: type should match when using codegen") { - checkAnswer(decimalData.agg(avg('a)), Row(new java.math.BigDecimal(2.0))) + checkAnswer(decimalData.agg(avg('a)), Row(new java.math.BigDecimal(2))) } test("SPARK-7133: Implement struct, array, and map field accessor") { @@ -1971,7 +1971,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { test("SPARK-19691 Calculating percentile of decimal column fails with ClassCastException") { val df = spark.range(1).selectExpr("CAST(id as DECIMAL) as x").selectExpr("percentile(x, 0.5)") - checkAnswer(df, Row(BigDecimal(0.0)) :: Nil) + checkAnswer(df, Row(BigDecimal(0)) :: Nil) } test("SPARK-19893: cannot run set operations with map type") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index c78ec6d9a8..e95f6dba46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1546,10 +1546,10 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Seq(Row(d))) checkAnswer( df.selectExpr("b * a + b"), - Seq(Row(BigDecimal(2.12321)))) + Seq(Row(BigDecimal("2.12321")))) checkAnswer( df.selectExpr("b * a - b"), - Seq(Row(BigDecimal(0.12321)))) + Seq(Row(BigDecimal("0.12321")))) checkAnswer( df.selectExpr("b * a * b"), Seq(Row(d))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index bcc2351049..a12efc8356 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -387,7 +387,7 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { Row("6.4817")) checkAnswer( - df.select(format_number(lit(BigDecimal(7.128381)), 4)), // not convert anything + df.select(format_number(lit(BigDecimal("7.128381")), 4)), // not convert anything Row("7.1284")) intercept[AnalysisException] { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 1cde137edb..80e5dd161d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -826,7 +826,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { StructField("b", DecimalType(2, 2), true):: Nil) assert(expectedSchema === jsonDF.schema) - checkAnswer(jsonDF, Row(1.0E-39D, BigDecimal(0.01))) + checkAnswer(jsonDF, Row(1.0E-39D, BigDecimal("0.01"))) val mergedJsonDF = spark.read .option("prefersDecimal", "true") @@ -839,7 +839,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { assert(expectedMergedSchema === mergedJsonDF.schema) checkAnswer( mergedJsonDF, - Row(1.0E-39D, BigDecimal(0.01)) :: + Row(1.0E-39D, BigDecimal("0.01")) :: Row(1.0E38D, BigDecimal("92233720368547758070")) :: Nil ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index b4f3de9961..84b34d5ad2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -676,7 +676,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha 1.5.toFloat, 4.5, new java.math.BigDecimal(new BigInteger("212500"), 5), - new java.math.BigDecimal(2.125), + new java.math.BigDecimal("2.125"), java.sql.Date.valueOf("2015-05-23"), new Timestamp(0), "This is a string, /[]?=:", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 2a522a1431..be6339f7dd 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -245,7 +245,7 @@ private[spark] object HiveUtils extends Logging { val loader = new IsolatedClientLoader( version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion), sparkConf = conf, - execJars = Seq(), + execJars = Seq.empty, hadoopConf = hadoopConf, config = newTemporaryConfiguration(useInMemoryDerby = true), isolationOn = false, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index 16c1103dd1..f238b9a4f7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -162,8 +162,8 @@ class HadoopTableReader( if (!sparkSession.sessionState.conf.verifyPartitionPath) { partitionToDeserializer } else { - var existPathSet = collection.mutable.Set[String]() - var pathPatternSet = collection.mutable.Set[String]() + val existPathSet = collection.mutable.Set[String]() + val pathPatternSet = collection.mutable.Set[String]() partitionToDeserializer.filter { case (partition, partDeserializer) => def updateExistPathSetByPathPattern(pathPatternStr: String) { @@ -181,8 +181,8 @@ class HadoopTableReader( } val partPath = partition.getDataLocation - val partNum = Utilities.getPartitionDesc(partition).getPartSpec.size(); - var pathPatternStr = getPathPatternByPath(partNum, partPath) + val partNum = Utilities.getPartitionDesc(partition).getPartSpec.size() + val pathPatternStr = getPathPatternByPath(partNum, partPath) if (!pathPatternSet.contains(pathPatternStr)) { pathPatternSet += pathPatternStr updateExistPathSetByPathPattern(pathPatternStr) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index be024adac8..bde9a81c65 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -391,7 +391,7 @@ private[hive] class HiveClientImpl( val sortColumnNames = if (allAscendingSorted) { sortColumnOrders.map(_.getCol) } else { - Seq() + Seq.empty } Option(BucketSpec(h.getNumBuckets, h.getBucketCols.asScala, sortColumnNames)) } else { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala index 3de1f4aeb7..11fd8c56e6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala @@ -90,7 +90,7 @@ class HiveInspectorSuite extends SparkFunSuite with HiveInspectors { Literal(0.asInstanceOf[Double]) :: Literal("0") :: Literal(java.sql.Date.valueOf("2014-09-23")) :: - Literal(Decimal(BigDecimal(123.123))) :: + Literal(Decimal(BigDecimal("123.123"))) :: Literal(new java.sql.Timestamp(123123)) :: Literal(Array[Byte](1, 2, 3)) :: Literal.create(Seq[Int](1, 2, 3), ArrayType(IntegerType)) :: diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala index 6a2c23a015..3eedcf7e08 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala @@ -21,12 +21,9 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.scalatest.BeforeAndAfterAll -import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.catalog._ -import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, EmptyRow, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, In, InSet, LessThan, LessThanOrEqual, Like, Literal, Or} +import org.apache.spark.sql.catalyst.expressions.{EmptyRow, Expression, In, InSet} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.hive.HiveUtils -import org.apache.spark.sql.types.{ByteType, IntegerType, StringType} // TODO: Refactor this to `HivePartitionFilteringSuite` class HiveClientSuite(version: String) @@ -146,7 +143,7 @@ class HiveClientSuite(version: String) 0 to 23, "aa" :: "ab" :: "ba" :: "bb" :: Nil, { case expr @ In(v, list) if expr.inSetConvertible => - InSet(v, Set() ++ list.map(_.eval(EmptyRow))) + InSet(v, list.map(_.eval(EmptyRow)).toSet) }) } @@ -165,7 +162,7 @@ class HiveClientSuite(version: String) 0 to 23, "ab" :: "ba" :: Nil, { case expr @ In(v, list) if expr.inSetConvertible => - InSet(v, Set() ++ list.map(_.eval(EmptyRow))) + InSet(v, list.map(_.eval(EmptyRow)).toSet) }) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index a34f6c73fe..f3b4ff2d1d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -458,7 +458,7 @@ class StreamingContext private[streaming] ( queue: Queue[RDD[T]], oneAtATime: Boolean = true ): InputDStream[T] = { - queueStream(queue, oneAtATime, sc.makeRDD(Seq[T](), 1)) + queueStream(queue, oneAtATime, sc.makeRDD(Seq.empty[T], 1)) } /** diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala index a0a40fcee2..4a0ec31b5f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala @@ -153,7 +153,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T def context(): StreamingContext = dstream.context /** Return a new DStream by applying a function to all elements of this DStream. */ - def map[R](f: JFunction[T, R]): JavaDStream[R] = { + def map[U](f: JFunction[T, U]): JavaDStream[U] = { new JavaDStream(dstream.map(f)(fakeClassTag))(fakeClassTag) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala index 5bf1dabf08..d1a5e91793 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala @@ -76,7 +76,7 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag]( // Re-apply the update function to the old state RDD val updateFuncLocal = updateFunc val finalFunc = (iterator: Iterator[(K, S)]) => { - val i = iterator.map(t => (t._1, Seq[V](), Option(t._2))) + val i = iterator.map(t => (t._1, Seq.empty[V], Option(t._2))) updateFuncLocal(validTime, i) } val stateRDD = prevStateRDD.mapPartitions(finalFunc, preservePartitioning) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala index 408936653c..eb9996ece3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala @@ -63,7 +63,6 @@ object RawTextHelper { var i = 0 var len = 0 - var done = false var value: (String, Long) = null var swap: (String, Long) = null var count = 0