spark-instrumented-optimizer/project/MimaBuild.scala
Ankur Dave 905173df57 Unify GraphImpl RDDs + other graph load optimizations
This PR makes the following changes, primarily in e4fbd329aef85fe2c38b0167255d2a712893d683:

1. *Unify RDDs to avoid zipPartitions.* A graph used to be four RDDs: vertices, edges, routing table, and triplet view. This commit merges them down to two: vertices (with routing table), and edges (with replicated vertices).

2. *Avoid duplicate shuffle in graph building.* We used to do two shuffles when building a graph: one to extract routing information from the edges and move it to the vertices, and another to find nonexistent vertices referred to by edges. With this commit, the latter is done as a side effect of the former.

3. *Avoid no-op shuffle when joins are fully eliminated.* This is a side effect of unifying the edges and the triplet view.

4. *Join elimination for mapTriplets.*

5. *Ship only the needed vertex attributes when upgrading the triplet view.* If the triplet view already contains source attributes, and we now need both attributes, only ship destination attributes rather than re-shipping both. This is done in `ReplicatedVertexView#upgrade`.

Author: Ankur Dave <ankurdave@gmail.com>

Closes #497 from ankurdave/unify-rdds and squashes the following commits:

332ab43 [Ankur Dave] Merge remote-tracking branch 'apache-spark/master' into unify-rdds
4933e2e [Ankur Dave] Exclude RoutingTable from binary compatibility check
5ba8789 [Ankur Dave] Add GraphX upgrade guide from Spark 0.9.1
13ac845 [Ankur Dave] Merge remote-tracking branch 'apache-spark/master' into unify-rdds
a04765c [Ankur Dave] Remove unnecessary toOps call
57202e8 [Ankur Dave] Replace case with pair parameter
75af062 [Ankur Dave] Add explicit return types
04d3ae5 [Ankur Dave] Convert implicit parameter to context bound
c88b269 [Ankur Dave] Revert upgradeIterator to if-in-a-loop
0d3584c [Ankur Dave] EdgePartition.size should be val
2a928b2 [Ankur Dave] Set locality wait
10b3596 [Ankur Dave] Clean up public API
ae36110 [Ankur Dave] Fix style errors
e4fbd32 [Ankur Dave] Unify GraphImpl RDDs + other graph load optimizations
d6d60e2 [Ankur Dave] In GraphLoader, coalesce to minEdgePartitions
62c7b78 [Ankur Dave] In Analytics, take PageRank numIter
d64e8d4 [Ankur Dave] Log current Pregel iteration
2014-05-10 14:48:07 -07:00

97 lines
3.7 KiB
Scala

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
import sbt._
object MimaBuild {
def ignoredABIProblems(base: File) = {
import com.typesafe.tools.mima.core._
import com.typesafe.tools.mima.core.ProblemFilters._
// Excludes placed here will be used for all Spark versions
val defaultExcludes = Seq()
// Read package-private excludes from file
val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
val excludeFile = file(excludeFilePath)
val packagePrivateList: Seq[String] =
if (!excludeFile.exists()) {
Seq()
} else {
IO.read(excludeFile).split("\n")
}
// Exclude a single class and its corresponding object
def excludeClass(className: String) = {
Seq(
excludePackage(className),
ProblemFilters.exclude[MissingClassProblem](className),
ProblemFilters.exclude[MissingTypesProblem](className),
excludePackage(className + "$"),
ProblemFilters.exclude[MissingClassProblem](className + "$"),
ProblemFilters.exclude[MissingTypesProblem](className + "$")
)
}
// Exclude a Spark class, that is in the package org.apache.spark
def excludeSparkClass(className: String) = {
excludeClass("org.apache.spark." + className)
}
// Exclude a Spark package, that is in the package org.apache.spark
def excludeSparkPackage(packageName: String) = {
excludePackage("org.apache.spark." + packageName)
}
val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass)
/* Excludes specific to a given version of Spark. When comparing the given version against
its immediate predecessor, the excludes listed here will be applied. */
val versionExcludes =
SparkBuild.SPARK_VERSION match {
case v if v.startsWith("1.0") =>
Seq(
excludeSparkPackage("api.java"),
excludeSparkPackage("mllib"),
excludeSparkPackage("streaming")
) ++
excludeSparkClass("rdd.ClassTags") ++
excludeSparkClass("util.XORShiftRandom") ++
excludeSparkClass("graphx.EdgeRDD") ++
excludeSparkClass("graphx.VertexRDD") ++
excludeSparkClass("graphx.impl.GraphImpl") ++
excludeSparkClass("graphx.impl.RoutingTable") ++
excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
excludeSparkClass("mllib.optimization.SquaredGradient") ++
excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
excludeSparkClass("mllib.regression.LassoWithSGD") ++
excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
case _ => Seq()
}
defaultExcludes ++ packagePrivateExcludes ++ versionExcludes
}
def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
previousArtifact := None,
binaryIssueFilters ++= ignoredABIProblems(sparkHome)
)
}