2014-06-01 20:27:05 -04:00
|
|
|
/*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
|
|
* this work for additional information regarding copyright ownership.
|
|
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
* (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
import com.typesafe.tools.mima.core._
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Additional excludes for checking of Spark's binary compatibility.
|
|
|
|
*
|
|
|
|
* The Mima build will automatically exclude @DeveloperApi and @Experimental classes. This acts
|
|
|
|
* as an official audit of cases where we excluded other classes. Please use the narrowest
|
|
|
|
* possible exclude here. MIMA will usually tell you what exclude to use, e.g.:
|
|
|
|
*
|
|
|
|
* ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.take")
|
|
|
|
*
|
|
|
|
* It is also possible to exclude Spark classes and packages. This should be used sparingly:
|
|
|
|
*
|
|
|
|
* MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap")
|
|
|
|
*/
|
|
|
|
object MimaExcludes {
|
2014-07-23 20:12:28 -04:00
|
|
|
def excludes(version: String) =
|
|
|
|
version match {
|
2014-09-07 23:39:53 -04:00
|
|
|
case v if v.startsWith("1.2") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("graphx")
|
2014-09-19 01:18:51 -04:00
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.linalg.Matrix") ++
|
[MLlib] [SPARK-2885] DIMSUM: All-pairs similarity
# All-pairs similarity via DIMSUM
Compute all pairs of similar vectors using brute force approach, and also DIMSUM sampling approach.
Laying down some notation: we are looking for all pairs of similar columns in an m x n RowMatrix whose entries are denoted a_ij, with the i’th row denoted r_i and the j’th column denoted c_j. There is an oversampling parameter labeled ɣ that should be set to 4 log(n)/s to get provably correct results (with high probability), where s is the similarity threshold.
The algorithm is stated with a Map and Reduce, with proofs of correctness and efficiency in published papers [1] [2]. The reducer is simply the summation reducer. The mapper is more interesting, and is also the heart of the scheme. As an exercise, you should try to see why in expectation, the map-reduce below outputs cosine similarities.
![dimsumv2](https://cloud.githubusercontent.com/assets/3220351/3807272/d1d9514e-1c62-11e4-9f12-3cfdb1d78b3a.png)
[1] Bosagh-Zadeh, Reza and Carlsson, Gunnar (2013), Dimension Independent Matrix Square using MapReduce, arXiv:1304.1467 http://arxiv.org/abs/1304.1467
[2] Bosagh-Zadeh, Reza and Goel, Ashish (2012), Dimension Independent Similarity Computation, arXiv:1206.2082 http://arxiv.org/abs/1206.2082
# Testing
Tests for all invocations included.
Added L1 and L2 norm computation to MultivariateStatisticalSummary since it was needed. Added tests for both of them.
Author: Reza Zadeh <rizlar@gmail.com>
Author: Xiangrui Meng <meng@databricks.com>
Closes #1778 from rezazadeh/dimsumv2 and squashes the following commits:
404c64c [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
4eb71c6 [Reza Zadeh] Add excludes for normL1 and normL2
ee8bd65 [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
976ddd4 [Reza Zadeh] Broadcast colMags. Avoid div by zero.
3467cff [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
aea0247 [Reza Zadeh] Allow large thresholds to promote sparsity
9fe17c0 [Xiangrui Meng] organize imports
2196ba5 [Xiangrui Meng] Merge branch 'rezazadeh-dimsumv2' into dimsumv2
254ca08 [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
f2947e4 [Xiangrui Meng] some optimization
3c4cf41 [Xiangrui Meng] Merge branch 'master' into rezazadeh-dimsumv2
0e4eda4 [Reza Zadeh] Use partition index for RNG
251bb9c [Reza Zadeh] Documentation
25e9d0d [Reza Zadeh] Line length for style
fb296f6 [Reza Zadeh] renamed to normL1 and normL2
3764983 [Reza Zadeh] Documentation
e9c6791 [Reza Zadeh] New interface and documentation
613f261 [Reza Zadeh] Column magnitude summary
75a0b51 [Reza Zadeh] Use Ints instead of Longs in the shuffle
0f12ade [Reza Zadeh] Style changes
eb1dc20 [Reza Zadeh] Use Double.PositiveInfinity instead of Double.Max
f56a882 [Reza Zadeh] Remove changes to MultivariateOnlineSummarizer
dbc55ba [Reza Zadeh] Make colMagnitudes a method in RowMatrix
41e8ece [Reza Zadeh] style changes
139c8e1 [Reza Zadeh] Syntax changes
029aa9c [Reza Zadeh] javadoc and new test
75edb25 [Reza Zadeh] All tests passing!
05e59b8 [Reza Zadeh] Add test
502ce52 [Reza Zadeh] new interface
654c4fb [Reza Zadeh] default methods
3726ca9 [Reza Zadeh] Remove MatrixAlgebra
6bebabb [Reza Zadeh] remove changes to MatrixSuite
5b8cd7d [Reza Zadeh] Initial files
2014-09-29 14:15:09 -04:00
|
|
|
MimaBuild.excludeSparkClass("mllib.linalg.Vector") ++
|
|
|
|
Seq(
|
2014-10-02 03:29:31 -04:00
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.scheduler.TaskLocation"),
|
[MLlib] [SPARK-2885] DIMSUM: All-pairs similarity
# All-pairs similarity via DIMSUM
Compute all pairs of similar vectors using brute force approach, and also DIMSUM sampling approach.
Laying down some notation: we are looking for all pairs of similar columns in an m x n RowMatrix whose entries are denoted a_ij, with the i’th row denoted r_i and the j’th column denoted c_j. There is an oversampling parameter labeled ɣ that should be set to 4 log(n)/s to get provably correct results (with high probability), where s is the similarity threshold.
The algorithm is stated with a Map and Reduce, with proofs of correctness and efficiency in published papers [1] [2]. The reducer is simply the summation reducer. The mapper is more interesting, and is also the heart of the scheme. As an exercise, you should try to see why in expectation, the map-reduce below outputs cosine similarities.
![dimsumv2](https://cloud.githubusercontent.com/assets/3220351/3807272/d1d9514e-1c62-11e4-9f12-3cfdb1d78b3a.png)
[1] Bosagh-Zadeh, Reza and Carlsson, Gunnar (2013), Dimension Independent Matrix Square using MapReduce, arXiv:1304.1467 http://arxiv.org/abs/1304.1467
[2] Bosagh-Zadeh, Reza and Goel, Ashish (2012), Dimension Independent Similarity Computation, arXiv:1206.2082 http://arxiv.org/abs/1206.2082
# Testing
Tests for all invocations included.
Added L1 and L2 norm computation to MultivariateStatisticalSummary since it was needed. Added tests for both of them.
Author: Reza Zadeh <rizlar@gmail.com>
Author: Xiangrui Meng <meng@databricks.com>
Closes #1778 from rezazadeh/dimsumv2 and squashes the following commits:
404c64c [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
4eb71c6 [Reza Zadeh] Add excludes for normL1 and normL2
ee8bd65 [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
976ddd4 [Reza Zadeh] Broadcast colMags. Avoid div by zero.
3467cff [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
aea0247 [Reza Zadeh] Allow large thresholds to promote sparsity
9fe17c0 [Xiangrui Meng] organize imports
2196ba5 [Xiangrui Meng] Merge branch 'rezazadeh-dimsumv2' into dimsumv2
254ca08 [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
f2947e4 [Xiangrui Meng] some optimization
3c4cf41 [Xiangrui Meng] Merge branch 'master' into rezazadeh-dimsumv2
0e4eda4 [Reza Zadeh] Use partition index for RNG
251bb9c [Reza Zadeh] Documentation
25e9d0d [Reza Zadeh] Line length for style
fb296f6 [Reza Zadeh] renamed to normL1 and normL2
3764983 [Reza Zadeh] Documentation
e9c6791 [Reza Zadeh] New interface and documentation
613f261 [Reza Zadeh] Column magnitude summary
75a0b51 [Reza Zadeh] Use Ints instead of Longs in the shuffle
0f12ade [Reza Zadeh] Style changes
eb1dc20 [Reza Zadeh] Use Double.PositiveInfinity instead of Double.Max
f56a882 [Reza Zadeh] Remove changes to MultivariateOnlineSummarizer
dbc55ba [Reza Zadeh] Make colMagnitudes a method in RowMatrix
41e8ece [Reza Zadeh] style changes
139c8e1 [Reza Zadeh] Syntax changes
029aa9c [Reza Zadeh] javadoc and new test
75edb25 [Reza Zadeh] All tests passing!
05e59b8 [Reza Zadeh] Add test
502ce52 [Reza Zadeh] new interface
654c4fb [Reza Zadeh] default methods
3726ca9 [Reza Zadeh] Remove MatrixAlgebra
6bebabb [Reza Zadeh] remove changes to MatrixSuite
5b8cd7d [Reza Zadeh] Initial files
2014-09-29 14:15:09 -04:00
|
|
|
// Added normL1 and normL2 to trait MultivariateStatisticalSummary
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.stat.MultivariateStatisticalSummary.normL1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
2014-09-30 01:56:22 -04:00
|
|
|
"org.apache.spark.mllib.stat.MultivariateStatisticalSummary.normL2"),
|
|
|
|
// MapStatus should be private[spark]
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
2014-10-16 21:38:45 -04:00
|
|
|
"org.apache.spark.scheduler.MapStatus"),
|
|
|
|
// TaskContext was promoted to Abstract class
|
|
|
|
ProblemFilters.exclude[AbstractClassProblem](
|
|
|
|
"org.apache.spark.TaskContext")
|
2014-10-19 23:02:31 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// Adding new methods to the JavaRDDLike trait:
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.takeAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.foreachPartitionAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.countAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.collectAsync")
|
[MLlib] [SPARK-2885] DIMSUM: All-pairs similarity
# All-pairs similarity via DIMSUM
Compute all pairs of similar vectors using brute force approach, and also DIMSUM sampling approach.
Laying down some notation: we are looking for all pairs of similar columns in an m x n RowMatrix whose entries are denoted a_ij, with the i’th row denoted r_i and the j’th column denoted c_j. There is an oversampling parameter labeled ɣ that should be set to 4 log(n)/s to get provably correct results (with high probability), where s is the similarity threshold.
The algorithm is stated with a Map and Reduce, with proofs of correctness and efficiency in published papers [1] [2]. The reducer is simply the summation reducer. The mapper is more interesting, and is also the heart of the scheme. As an exercise, you should try to see why in expectation, the map-reduce below outputs cosine similarities.
![dimsumv2](https://cloud.githubusercontent.com/assets/3220351/3807272/d1d9514e-1c62-11e4-9f12-3cfdb1d78b3a.png)
[1] Bosagh-Zadeh, Reza and Carlsson, Gunnar (2013), Dimension Independent Matrix Square using MapReduce, arXiv:1304.1467 http://arxiv.org/abs/1304.1467
[2] Bosagh-Zadeh, Reza and Goel, Ashish (2012), Dimension Independent Similarity Computation, arXiv:1206.2082 http://arxiv.org/abs/1206.2082
# Testing
Tests for all invocations included.
Added L1 and L2 norm computation to MultivariateStatisticalSummary since it was needed. Added tests for both of them.
Author: Reza Zadeh <rizlar@gmail.com>
Author: Xiangrui Meng <meng@databricks.com>
Closes #1778 from rezazadeh/dimsumv2 and squashes the following commits:
404c64c [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
4eb71c6 [Reza Zadeh] Add excludes for normL1 and normL2
ee8bd65 [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
976ddd4 [Reza Zadeh] Broadcast colMags. Avoid div by zero.
3467cff [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
aea0247 [Reza Zadeh] Allow large thresholds to promote sparsity
9fe17c0 [Xiangrui Meng] organize imports
2196ba5 [Xiangrui Meng] Merge branch 'rezazadeh-dimsumv2' into dimsumv2
254ca08 [Reza Zadeh] Merge remote-tracking branch 'upstream/master' into dimsumv2
f2947e4 [Xiangrui Meng] some optimization
3c4cf41 [Xiangrui Meng] Merge branch 'master' into rezazadeh-dimsumv2
0e4eda4 [Reza Zadeh] Use partition index for RNG
251bb9c [Reza Zadeh] Documentation
25e9d0d [Reza Zadeh] Line length for style
fb296f6 [Reza Zadeh] renamed to normL1 and normL2
3764983 [Reza Zadeh] Documentation
e9c6791 [Reza Zadeh] New interface and documentation
613f261 [Reza Zadeh] Column magnitude summary
75a0b51 [Reza Zadeh] Use Ints instead of Longs in the shuffle
0f12ade [Reza Zadeh] Style changes
eb1dc20 [Reza Zadeh] Use Double.PositiveInfinity instead of Double.Max
f56a882 [Reza Zadeh] Remove changes to MultivariateOnlineSummarizer
dbc55ba [Reza Zadeh] Make colMagnitudes a method in RowMatrix
41e8ece [Reza Zadeh] style changes
139c8e1 [Reza Zadeh] Syntax changes
029aa9c [Reza Zadeh] javadoc and new test
75edb25 [Reza Zadeh] All tests passing!
05e59b8 [Reza Zadeh] Add test
502ce52 [Reza Zadeh] new interface
654c4fb [Reza Zadeh] default methods
3726ca9 [Reza Zadeh] Remove MatrixAlgebra
6bebabb [Reza Zadeh] remove changes to MatrixSuite
5b8cd7d [Reza Zadeh] Initial files
2014-09-29 14:15:09 -04:00
|
|
|
)
|
2014-09-16 00:14:00 -04:00
|
|
|
|
2014-07-23 20:12:28 -04:00
|
|
|
case v if v.startsWith("1.1") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("graphx")
|
|
|
|
) ++
|
|
|
|
Seq(
|
|
|
|
// Adding new method to JavaRDLike trait - we should probably mark this as a developer API.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitions"),
|
2014-09-02 02:28:19 -04:00
|
|
|
// Should probably mark this as Experimental
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
|
2014-07-23 20:12:28 -04:00
|
|
|
// We made a mistake earlier (ed06500d3) in the Java API to use default parameter values
|
|
|
|
// for countApproxDistinct* functions, which does not work in Java. We later removed
|
|
|
|
// them, and use the following to tell Mima to not care about them.
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDD.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
|
2014-08-30 02:05:18 -04:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.DiskStore.getValues"),
|
2014-07-23 20:12:28 -04:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.MemoryStore.Entry")
|
|
|
|
) ++
|
2014-08-16 02:12:34 -04:00
|
|
|
Seq(
|
|
|
|
// Serializer interface change. See SPARK-3045.
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.DeserializationStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.Serializer"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.SerializationStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.SerializerInstance")
|
|
|
|
)++
|
2014-07-23 20:12:28 -04:00
|
|
|
Seq(
|
2014-07-27 19:08:16 -04:00
|
|
|
// Renamed putValues -> putArray + putIterator
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.MemoryStore.putValues"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.DiskStore.putValues"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.TachyonStore.putValues")
|
|
|
|
) ++
|
|
|
|
Seq(
|
2014-08-01 07:32:46 -04:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.flume.FlumeReceiver.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaUtils.createStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaReceiver.this")
|
2014-07-23 20:12:28 -04:00
|
|
|
) ++
|
|
|
|
Seq( // Ignore some private methods in ALS.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]( // The only public constructor is the one without arguments.
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures")
|
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.linalg.distributed.ColumnStatisticsAggregator") ++
|
|
|
|
MimaBuild.excludeSparkClass("rdd.ZippedRDD") ++
|
|
|
|
MimaBuild.excludeSparkClass("rdd.ZippedPartition") ++
|
|
|
|
MimaBuild.excludeSparkClass("util.SerializableHyperLogLog") ++
|
|
|
|
MimaBuild.excludeSparkClass("storage.Values") ++
|
|
|
|
MimaBuild.excludeSparkClass("storage.Entry") ++
|
|
|
|
MimaBuild.excludeSparkClass("storage.MemoryStore$Entry") ++
|
2014-09-03 17:57:38 -04:00
|
|
|
// Class was missing "@DeveloperApi" annotation in 1.0.
|
|
|
|
MimaBuild.excludeSparkClass("scheduler.SparkListenerApplicationStart") ++
|
2014-07-23 20:12:28 -04:00
|
|
|
Seq(
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.tree.impurity.Gini.calculate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.tree.impurity.Entropy.calculate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.tree.impurity.Variance.calculate")
|
2014-07-30 20:34:32 -04:00
|
|
|
) ++
|
2014-09-03 17:57:38 -04:00
|
|
|
Seq( // Package-private classes removed in SPARK-2341
|
2014-07-30 20:34:32 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.BinaryLabelParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.BinaryLabelParser$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.LabelParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.LabelParser$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.MulticlassLabelParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.MulticlassLabelParser$")
|
2014-09-03 17:57:38 -04:00
|
|
|
) ++
|
2014-08-12 01:33:45 -04:00
|
|
|
Seq( // package-private classes removed in MLlib
|
2014-08-08 18:07:31 -04:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.org$apache$spark$mllib$regression$GeneralizedLinearAlgorithm$$prependOne")
|
2014-08-12 01:33:45 -04:00
|
|
|
) ++
|
|
|
|
Seq( // new Vector methods in MLlib (binary compatible assuming users do not implement Vector)
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.copy")
|
2014-08-15 11:53:52 -04:00
|
|
|
) ++
|
2014-08-16 18:13:34 -04:00
|
|
|
Seq( // synthetic methods generated in LabeledPoint
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.mllib.regression.LabeledPoint$"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.regression.LabeledPoint.apply"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LabeledPoint.toString")
|
|
|
|
) ++
|
2014-08-15 11:53:52 -04:00
|
|
|
Seq ( // Scala 2.11 compatibility fix
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.<init>$default$2")
|
2014-08-02 02:55:30 -04:00
|
|
|
)
|
2014-07-23 20:12:28 -04:00
|
|
|
case v if v.startsWith("1.0") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("api.java"),
|
|
|
|
MimaBuild.excludeSparkPackage("mllib"),
|
|
|
|
MimaBuild.excludeSparkPackage("streaming")
|
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("rdd.ClassTags") ++
|
|
|
|
MimaBuild.excludeSparkClass("util.XORShiftRandom") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.EdgeRDD") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.VertexRDD") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.impl.GraphImpl") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.impl.RoutingTable") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.util.collection.PrimitiveKeyOpenHashMap") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.optimization.SquaredGradient") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.regression.LassoWithSGD") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
|
|
|
|
case _ => Seq()
|
|
|
|
}
|
2014-06-01 20:27:05 -04:00
|
|
|
}
|