2014-06-01 20:27:05 -04:00
|
|
|
/*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
|
|
* this work for additional information regarding copyright ownership.
|
|
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
* (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
import com.typesafe.tools.mima.core._
|
2015-03-20 14:43:57 -04:00
|
|
|
import com.typesafe.tools.mima.core.ProblemFilters._
|
2014-06-01 20:27:05 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Additional excludes for checking of Spark's binary compatibility.
|
|
|
|
*
|
2016-03-16 02:25:31 -04:00
|
|
|
* This acts as an official audit of cases where we excluded other classes. Please use the narrowest
|
2014-06-01 20:27:05 -04:00
|
|
|
* possible exclude here. MIMA will usually tell you what exclude to use, e.g.:
|
|
|
|
*
|
|
|
|
* ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.take")
|
|
|
|
*
|
|
|
|
* It is also possible to exclude Spark classes and packages. This should be used sparingly:
|
|
|
|
*
|
|
|
|
* MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap")
|
2016-01-03 19:58:01 -05:00
|
|
|
*
|
|
|
|
* For a new Spark version, please update MimaBuild.scala to reflect the previous version.
|
2014-06-01 20:27:05 -04:00
|
|
|
*/
|
|
|
|
object MimaExcludes {
|
2015-09-07 16:42:30 -04:00
|
|
|
def excludes(version: String) = version match {
|
2015-12-19 18:13:05 -05:00
|
|
|
case v if v.startsWith("2.0") =>
|
2015-12-21 17:07:48 -05:00
|
|
|
Seq(
|
2016-01-03 19:58:01 -05:00
|
|
|
excludePackage("org.apache.spark.rpc"),
|
|
|
|
excludePackage("org.spark-project.jetty"),
|
|
|
|
excludePackage("org.apache.spark.unused"),
|
2016-02-09 19:41:21 -05:00
|
|
|
excludePackage("org.apache.spark.unsafe"),
|
2016-01-05 21:46:52 -05:00
|
|
|
excludePackage("org.apache.spark.util.collection.unsafe"),
|
2016-01-03 19:58:01 -05:00
|
|
|
excludePackage("org.apache.spark.sql.catalyst"),
|
|
|
|
excludePackage("org.apache.spark.sql.execution"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.feature.PCAModel.this"),
|
2016-01-04 21:02:38 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.StageData.this"),
|
2016-01-29 12:54:58 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.status.api.v1.ApplicationAttemptInfo.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.status.api.v1.ApplicationAttemptInfo.<init>$default$5"),
|
2016-04-19 17:35:26 -04:00
|
|
|
// SPARK-14042 Add custom coalescer support
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.rdd.RDD.coalesce"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.rdd.PartitionCoalescer$LocationIterator"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.rdd.PartitionCoalescer"),
|
2016-01-04 21:02:38 -05:00
|
|
|
// SPARK-12600 Remove SQL deprecated methods
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLContext$QueryExecution"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLContext$SparkPlanner"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.applySchema"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.parquetFile"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.jdbc"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.jsonFile"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.jsonRDD"),
|
2016-01-18 16:55:42 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.load"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.dialectClassName"),
|
[SPARK-13665][SQL] Separate the concerns of HadoopFsRelation
`HadoopFsRelation` is used for reading most files into Spark SQL. However today this class mixes the concerns of file management, schema reconciliation, scan building, bucketing, partitioning, and writing data. As a result, many data sources are forced to reimplement the same functionality and the various layers have accumulated a fair bit of inefficiency. This PR is a first cut at separating this into several components / interfaces that are each described below. Additionally, all implementations inside of Spark (parquet, csv, json, text, orc, svmlib) have been ported to the new API `FileFormat`. External libraries, such as spark-avro will also need to be ported to work with Spark 2.0.
### HadoopFsRelation
A simple `case class` that acts as a container for all of the metadata required to read from a datasource. All discovery, resolution and merging logic for schemas and partitions has been removed. This an internal representation that no longer needs to be exposed to developers.
```scala
case class HadoopFsRelation(
sqlContext: SQLContext,
location: FileCatalog,
partitionSchema: StructType,
dataSchema: StructType,
bucketSpec: Option[BucketSpec],
fileFormat: FileFormat,
options: Map[String, String]) extends BaseRelation
```
### FileFormat
The primary interface that will be implemented by each different format including external libraries. Implementors are responsible for reading a given format and converting it into `InternalRow` as well as writing out an `InternalRow`. A format can optionally return a schema that is inferred from a set of files.
```scala
trait FileFormat {
def inferSchema(
sqlContext: SQLContext,
options: Map[String, String],
files: Seq[FileStatus]): Option[StructType]
def prepareWrite(
sqlContext: SQLContext,
job: Job,
options: Map[String, String],
dataSchema: StructType): OutputWriterFactory
def buildInternalScan(
sqlContext: SQLContext,
dataSchema: StructType,
requiredColumns: Array[String],
filters: Array[Filter],
bucketSet: Option[BitSet],
inputFiles: Array[FileStatus],
broadcastedConf: Broadcast[SerializableConfiguration],
options: Map[String, String]): RDD[InternalRow]
}
```
The current interface is based on what was required to get all the tests passing again, but still mixes a couple of concerns (i.e. `bucketSet` is passed down to the scan instead of being resolved by the planner). Additionally, scans are still returning `RDD`s instead of iterators for single files. In a future PR, bucketing should be removed from this interface and the scan should be isolated to a single file.
### FileCatalog
This interface is used to list the files that make up a given relation, as well as handle directory based partitioning.
```scala
trait FileCatalog {
def paths: Seq[Path]
def partitionSpec(schema: Option[StructType]): PartitionSpec
def allFiles(): Seq[FileStatus]
def getStatus(path: Path): Array[FileStatus]
def refresh(): Unit
}
```
Currently there are two implementations:
- `HDFSFileCatalog` - based on code from the old `HadoopFsRelation`. Infers partitioning by recursive listing and caches this data for performance
- `HiveFileCatalog` - based on the above, but it uses the partition spec from the Hive Metastore.
### ResolvedDataSource
Produces a logical plan given the following description of a Data Source (which can come from DataFrameReader or a metastore):
- `paths: Seq[String] = Nil`
- `userSpecifiedSchema: Option[StructType] = None`
- `partitionColumns: Array[String] = Array.empty`
- `bucketSpec: Option[BucketSpec] = None`
- `provider: String`
- `options: Map[String, String]`
This class is responsible for deciding which of the Data Source APIs a given provider is using (including the non-file based ones). All reconciliation of partitions, buckets, schema from metastores or inference is done here.
### DataSourceAnalysis / DataSourceStrategy
Responsible for analyzing and planning reading/writing of data using any of the Data Source APIs, including:
- pruning the files from partitions that will be read based on filters.
- appending partition columns*
- applying additional filters when a data source can not evaluate them internally.
- constructing an RDD that is bucketed correctly when required*
- sanity checking schema match-up and other analysis when writing.
*In the future we should do that following:
- Break out file handling into its own Strategy as its sufficiently complex / isolated.
- Push the appending of partition columns down in to `FileFormat` to avoid an extra copy / unvectorization.
- Use a custom RDD for scans instead of `SQLNewNewHadoopRDD2`
Author: Michael Armbrust <michael@databricks.com>
Author: Wenchen Fan <wenchen@databricks.com>
Closes #11509 from marmbrus/fileDataSource.
2016-03-07 18:15:10 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.getSQLDialect"),
|
|
|
|
// SPARK-13664 Replace HadoopFsRelation with FileFormat
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.source.libsvm.LibSVMRelation"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.HadoopFsRelationProvider"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.HadoopFsRelation$FileStatusCache")
|
2015-12-31 03:15:55 -05:00
|
|
|
) ++ Seq(
|
2015-12-30 21:07:07 -05:00
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.SparkContext.emptyRDD"),
|
2016-04-04 16:26:18 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.broadcast.HttpBroadcastFactory"),
|
|
|
|
// SPARK-14358 SparkListener from trait to abstract class
|
2016-04-07 21:05:54 -04:00
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.SparkContext.addSparkListener"),
|
2016-04-04 16:26:18 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.JavaSparkListener"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.SparkFirehoseListener"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.scheduler.SparkListener"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ui.jobs.JobProgressListener"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ui.exec.ExecutorsListener"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ui.env.EnvironmentListener"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ui.storage.StorageListener"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.storage.StorageStatusListener")
|
2016-01-08 16:02:30 -05:00
|
|
|
) ++
|
2016-01-26 06:55:28 -05:00
|
|
|
Seq(
|
|
|
|
// SPARK-3369 Fix Iterable/Iterator in Java API
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.FlatMapFunction.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.FlatMapFunction.call"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.DoubleFlatMapFunction.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.DoubleFlatMapFunction.call"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.FlatMapFunction2.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.FlatMapFunction2.call"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.PairFlatMapFunction.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.PairFlatMapFunction.call"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.CoGroupFunction.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.CoGroupFunction.call"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.MapPartitionsFunction.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.MapPartitionsFunction.call"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.function.FlatMapGroupsFunction.call"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.function.FlatMapGroupsFunction.call")
|
|
|
|
) ++
|
2016-04-22 07:24:12 -04:00
|
|
|
Seq(
|
|
|
|
// [SPARK-6429] Implement hashCode and equals together
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.Partition.org$apache$spark$Partition$$super=uals")
|
|
|
|
) ++
|
2016-01-08 16:02:30 -05:00
|
|
|
Seq(
|
|
|
|
// SPARK-4819 replace Guava Optional
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.api.java.JavaSparkContext.getCheckpointDir"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.api.java.JavaSparkContext.getSparkHome"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.getCheckpointFile"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.getCheckpointFile"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner")
|
|
|
|
) ++
|
2016-01-02 08:15:53 -05:00
|
|
|
Seq(
|
2016-01-03 19:58:01 -05:00
|
|
|
// SPARK-12481 Remove Hadoop 1.x
|
2016-01-05 14:10:14 -05:00
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.mapred.SparkHadoopMapRedUtil"),
|
|
|
|
// SPARK-12615 Remove deprecated APIs in core
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.<init>$default$6"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.numericRDDToDoubleRDDFunctions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.intToIntWritable"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.intWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.writableWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.rddToPairRDDFunctions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.rddToAsyncRDDActions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.boolToBoolWritable"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.longToLongWritable"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.doubleWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.rddToOrderedRDDFunctions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.floatWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.booleanWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.stringToText"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.doubleRDDToDoubleRDDFunctions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.doubleToDoubleWritable"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.bytesWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.rddToSequenceFileRDDFunctions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.bytesToBytesWritable"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.longWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.stringWritableConverter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.floatToFloatWritable"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.rddToPairRDDFunctions$default$4"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.addOnCompleteCallback"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.runningLocally"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.attemptId"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.defaultMinSplits"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.SparkContext.runJob"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.runJob"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.tachyonFolderName"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.initLocalProperties"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.clearJars"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.clearFiles"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.SparkContext.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.flatMapWith$default$2"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.toArray"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.mapWith$default$2"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.mapPartitionsWithSplit"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.flatMapWith"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.filterWith"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.foreachWith"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.mapWith"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.RDD.mapPartitionsWithSplit$default$2"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.rdd.SequenceFileRDDFunctions.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.splits"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.toArray"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaSparkContext.defaultMinSplits"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaSparkContext.clearJars"),
|
2016-01-15 15:03:28 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaSparkContext.clearFiles"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.externalBlockStoreFolderName"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.storage.ExternalBlockStore$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.storage.ExternalBlockManager"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.storage.ExternalBlockStore")
|
2016-01-25 15:42:44 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12149 Added new fields to ExecutorSummary
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.ExecutorSummary.this")
|
2016-01-06 13:19:41 -05:00
|
|
|
) ++
|
|
|
|
// SPARK-12665 Remove deprecated and unused classes
|
|
|
|
Seq(
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.graphx.GraphKryoRegistrator"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.Vector"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.Vector$Multiplier"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.Vector$")
|
2016-01-07 20:46:24 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12591 Register OpenHashMapBasedStateMap for Kryo
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.serializer.KryoInputDataInputBridge"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.serializer.KryoOutputDataOutputBridge")
|
2016-01-07 18:26:55 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12510 Refactor ActorReceiver to support Java
|
|
|
|
ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.streaming.receiver.ActorReceiver")
|
2016-01-27 14:15:48 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12895 Implement TaskMetrics using accumulators
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.internalMetricsToAccumulators"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.collectInternalAccumulators"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.collectAccumulators")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12896 Send only accumulator updates to driver, not TaskMetrics
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.Accumulable.this"),
|
2016-03-13 21:47:04 -04:00
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.Accumulator.this"),
|
2016-02-08 20:23:33 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.Accumulator.initialValue")
|
2016-01-12 00:06:22 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12692 Scala style: Fix the style violation (Space before "," or ":")
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.SparkSink.org$apache$spark$streaming$flume$sink$Logging$$log_"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.SparkSink.org$apache$spark$streaming$flume$sink$Logging$$log__="),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.SparkAvroCallbackHandler.org$apache$spark$streaming$flume$sink$Logging$$log_"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.SparkAvroCallbackHandler.org$apache$spark$streaming$flume$sink$Logging$$log__="),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.Logging.org$apache$spark$streaming$flume$sink$Logging$$log__="),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.Logging.org$apache$spark$streaming$flume$sink$Logging$$log_"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.Logging.org$apache$spark$streaming$flume$sink$Logging$$_log"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.Logging.org$apache$spark$streaming$flume$sink$Logging$$_log_="),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.TransactionProcessor.org$apache$spark$streaming$flume$sink$Logging$$log_"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.TransactionProcessor.org$apache$spark$streaming$flume$sink$Logging$$log__=")
|
2016-01-31 02:05:29 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12689 Migrate DDL parsing to the newly absorbed parser
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.execution.datasources.DDLParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.execution.datasources.DDLException"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.ddlParser")
|
2016-01-20 16:55:41 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-7799 Add "streaming-akka" project
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.zeromq.ZeroMQUtils.createStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.streaming.zeromq.ZeroMQUtils.createStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.streaming.zeromq.ZeroMQUtils.createStream$default$6"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.zeromq.ZeroMQUtils.createStream$default$5"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.actorStream$default$4"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.actorStream$default$3"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.actorStream"),
|
2016-01-23 00:20:04 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.api.java.JavaStreamingContext.actorStream"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.streaming.zeromq.ZeroMQReceiver"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.receiver.ActorReceiver$Supervisor")
|
2016-02-21 11:27:56 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12348 Remove deprecated Streaming APIs.
|
2016-03-17 02:02:25 -04:00
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.streaming.dstream.DStream.foreach"),
|
2016-02-21 11:27:56 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions$default$4"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.awaitTermination"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.networkStream"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.api.java.JavaStreamingContextFactory"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.api.java.JavaStreamingContext.awaitTermination"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.api.java.JavaStreamingContext.sc"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.api.java.JavaDStreamLike.reduceByWindow"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.api.java.JavaDStreamLike.foreachRDD"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.api.java.JavaDStreamLike.foreach"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.streaming.api.java.JavaStreamingContext.getOrCreate")
|
2016-01-20 14:57:53 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12847 Remove StreamingListenerBus and post all Streaming events to the same thread as Spark events
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.AsynchronousListenerBus$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.AsynchronousListenerBus")
|
2016-01-26 20:31:19 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-11622 Make LibSVMRelation extends HadoopFsRelation and Add LibSVMOutputWriter
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.source.libsvm.DefaultSource"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.source.libsvm.DefaultSource.createRelation")
|
2016-01-30 03:20:28 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-6363 Make Scala 2.11 the default Scala version
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.cleanup"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.metadataCleaner"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.scheduler.cluster.YarnSchedulerBackend$YarnDriverEndpoint"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.scheduler.cluster.YarnSchedulerBackend$YarnSchedulerEndpoint")
|
2016-02-11 22:37:53 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-7889
|
2016-02-14 00:06:31 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.deploy.history.HistoryServer.org$apache$spark$deploy$history$HistoryServer$@tachSparkUI"),
|
|
|
|
// SPARK-13296
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.UDFRegistration.register"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UserDefinedPythonFunction$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UserDefinedPythonFunction"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UserDefinedFunction"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UserDefinedFunction$")
|
2016-02-15 04:20:49 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12995 Remove deprecated APIs in graphx
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.lib.SVDPlusPlus.runSVDPlusPlus"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.Graph.mapReduceTriplets"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.Graph.mapReduceTriplets$default$3"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.impl.GraphImpl.mapReduceTriplets")
|
2016-02-22 17:01:35 -05:00
|
|
|
) ++ Seq(
|
2016-02-23 07:30:57 -05:00
|
|
|
// SPARK-13426 Remove the support of SIMR
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkMasterRegex.SIMR_REGEX")
|
2016-02-22 17:01:35 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-13413 Remove SparkContext.metricsSystem/schedulerBackend_ setter
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.metricsSystem"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.schedulerBackend_=")
|
2016-02-23 07:30:57 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-13220 Deprecate yarn-client and yarn-cluster mode
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
2016-02-27 22:51:28 -05:00
|
|
|
"org.apache.spark.SparkContext.org$apache$spark$SparkContext$$createTaskScheduler")
|
|
|
|
) ++ Seq(
|
2016-02-26 15:49:16 -05:00
|
|
|
// SPARK-13465 TaskContext.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.TaskContext.addTaskFailureListener")
|
2016-02-23 14:08:39 -05:00
|
|
|
) ++ Seq (
|
|
|
|
// SPARK-7729 Executor which has been killed should also be displayed on Executor Tab
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.ExecutorSummary.this")
|
2016-02-25 04:49:50 -05:00
|
|
|
) ++ Seq(
|
2016-02-27 22:51:28 -05:00
|
|
|
// SPARK-13526 Move SQLContext per-session states to new class
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.sql.UDFRegistration.this")
|
|
|
|
) ++ Seq(
|
2016-02-25 04:49:50 -05:00
|
|
|
// [SPARK-13486][SQL] Move SQLConf into an internal package
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLConf"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLConf$SQLConfEntry"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLConf$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLConf$SQLConfEntry$")
|
[SPARK-12073][STREAMING] backpressure rate controller consumes events preferentially from lagg…
…ing partitions
I'm pretty sure this is the reason we couldn't easily recover from an unbalanced Kafka partition under heavy load when using backpressure.
`maxMessagesPerPartition` calculates an appropriate limit for the message rate from all partitions, and then divides by the number of partitions to determine how many messages to retrieve per partition. The problem with this approach is that when one partition is behind by millions of records (due to random Kafka issues), but the rate estimator calculates only 100k total messages can be retrieved, each partition (out of say 32) only retrieves max 100k/32=3125 messages.
This PR (still needing a test) determines a per-partition desired message count by using the current lag for each partition to preferentially weight the total message limit among the partitions. In this situation, if each partition gets 1k messages, but 1 partition starts 1M behind, then the total number of messages to retrieve is (32 * 1k + 1M) = 1032000 messages, of which the one partition needs 1001000. So, it gets (1001000 / 1032000) = 97% of the 100k messages, and the other 31 partitions share the remaining 3%.
Assuming all of 100k the messages are retrieved and processed within the batch window, the rate calculator will increase the number of messages to retrieve in the next batch, until it reaches a new stable point or the backlog is finished processed.
We're going to try deploying this internally at Shopify to see if this resolves our issue.
tdas koeninger holdenk
Author: Jason White <jason.white@shopify.com>
Closes #10089 from JasonMWhite/rate_controller_offsets.
2016-03-04 19:04:56 -05:00
|
|
|
) ++ Seq(
|
2016-03-16 19:59:36 -04:00
|
|
|
//SPARK-11011 UserDefinedType serialization should be strongly typed
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.linalg.VectorUDT.serialize"),
|
[SPARK-12073][STREAMING] backpressure rate controller consumes events preferentially from lagg…
…ing partitions
I'm pretty sure this is the reason we couldn't easily recover from an unbalanced Kafka partition under heavy load when using backpressure.
`maxMessagesPerPartition` calculates an appropriate limit for the message rate from all partitions, and then divides by the number of partitions to determine how many messages to retrieve per partition. The problem with this approach is that when one partition is behind by millions of records (due to random Kafka issues), but the rate estimator calculates only 100k total messages can be retrieved, each partition (out of say 32) only retrieves max 100k/32=3125 messages.
This PR (still needing a test) determines a per-partition desired message count by using the current lag for each partition to preferentially weight the total message limit among the partitions. In this situation, if each partition gets 1k messages, but 1 partition starts 1M behind, then the total number of messages to retrieve is (32 * 1k + 1M) = 1032000 messages, of which the one partition needs 1001000. So, it gets (1001000 / 1032000) = 97% of the 100k messages, and the other 31 partitions share the remaining 3%.
Assuming all of 100k the messages are retrieved and processed within the batch window, the rate calculator will increase the number of messages to retrieve in the next batch, until it reaches a new stable point or the backlog is finished processed.
We're going to try deploying this internally at Shopify to see if this resolves our issue.
tdas koeninger holdenk
Author: Jason White <jason.white@shopify.com>
Closes #10089 from JasonMWhite/rate_controller_offsets.
2016-03-04 19:04:56 -05:00
|
|
|
// SPARK-12073: backpressure rate controller consumes events preferentially from lagging partitions
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.kafka.KafkaTestUtils.createTopic"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.kafka.DirectKafkaInputDStream.maxMessagesPerPartition")
|
2016-03-11 09:17:50 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13244][SQL] Migrates DataFrame to Dataset
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.tables"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.sql"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.baseRelationToDataFrame"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.table"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrame.apply"),
|
|
|
|
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrame"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrame$"),
|
2016-03-14 22:39:07 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.LegacyFunctions"),
|
2016-03-21 20:17:25 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrameHolder"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrameHolder$"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.localSeqToDataFrameHolder"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.stringRddToDataFrameHolder"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.rddToDataFrameHolder"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.longRddToDataFrameHolder"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.intRddToDataFrameHolder"),
|
2016-03-19 14:23:14 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.GroupedDataset"),
|
2016-03-23 02:43:09 -04:00
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.Dataset.subtract"),
|
2016-03-11 09:17:50 -05:00
|
|
|
|
2016-04-09 03:00:39 -04:00
|
|
|
// [SPARK-14451][SQL] Move encoder definition into Aggregator interface
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.Aggregator.toColumn"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.expressions.Aggregator.bufferEncoder"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.expressions.Aggregator.outputEncoder"),
|
|
|
|
|
2016-03-11 09:17:50 -05:00
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.MultilabelMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions")
|
2016-03-14 15:46:53 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this")
|
2016-05-11 01:21:17 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-15250 Remove deprecated json API in DataFrameReader
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.DataFrameReader.json")
|
2016-03-16 02:25:31 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-13920: MIMA checks should apply to @Experimental and @DeveloperAPI APIs
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.Aggregator.combineCombinersByKey"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.Aggregator.combineValuesByKey"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ComplexFutureAction.run"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ComplexFutureAction.runJob"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ComplexFutureAction.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.actorSystem"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.cacheManager"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.getConfigurationFromJobContext"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.getTaskAttemptIDFromTaskAttemptContext"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.newConfiguration"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.bytesReadCallback"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.bytesReadCallback_="),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.canEqual"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.copy"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.productArity"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.productElement"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.productIterator"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.productPrefix"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.setBytesReadCallback"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.updateBytesRead"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.canEqual"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.copy"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.productArity"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.productElement"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.productIterator"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.productPrefix"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleReadMetrics.decFetchWaitTime"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleReadMetrics.decLocalBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleReadMetrics.decRecordsRead"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleReadMetrics.decRemoteBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleReadMetrics.decRemoteBytesRead"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.decShuffleBytesWritten"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.decShuffleRecordsWritten"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.decShuffleWriteTime"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.incShuffleBytesWritten"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.incShuffleRecordsWritten"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.incShuffleWriteTime"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.ShuffleWriteMetrics.setShuffleRecordsWritten"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.PCAModel.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.rdd.RDD.mapPartitionsWithContext"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.AccumulableInfo.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.taskMetrics"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.TaskInfo.attempt"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.ExperimentalMethods.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.callUDF"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.callUdf"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.cumeDist"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.denseRank"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.inputFileName"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.isNaN"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.percentRank"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.rowNumber"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.functions.sparkPartitionId"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockStatus.apply"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockStatus.copy"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockStatus.externalBlockStoreSize"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockStatus.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.StorageStatus.offHeapUsed"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.StorageStatus.offHeapUsedByRdd"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.StorageStatusListener.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.streaming.scheduler.BatchInfo.streamIdToNumRecords"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ui.exec.ExecutorsListener.storageStatusList"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ui.exec.ExecutorsListener.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ui.storage.StorageListener.storageStatusList"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ExceptionFailure.apply"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ExceptionFailure.copy"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ExceptionFailure.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.executor.InputMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.executor.OutputMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Estimator.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Pipeline.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.PipelineModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.PredictionModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.PredictionModel.transformImpl"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Predictor.extractLabeledPoints"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Predictor.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Predictor.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Transformer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.ClassificationModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.GBTClassifier.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassifier.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.NaiveBayes.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.OneVsRest.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.OneVsRestModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.RandomForestClassifier.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.clustering.KMeans.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.clustering.KMeansModel.computeCost"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.clustering.KMeansModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.clustering.LDAModel.logLikelihood"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.clustering.LDAModel.logPerplexity"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.clustering.LDAModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.evaluation.Evaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.evaluation.RegressionEvaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.Binarizer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.Bucketizer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.ChiSqSelector.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.ChiSqSelectorModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.CountVectorizer.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.CountVectorizerModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.HashingTF.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.IDF.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.IDFModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.IndexToString.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.Interaction.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.MinMaxScaler.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.MinMaxScalerModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.OneHotEncoder.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.PCA.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.PCAModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.QuantileDiscretizer.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.RFormula.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.RFormulaModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.SQLTransformer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StandardScaler.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StandardScalerModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StopWordsRemover.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StringIndexer.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StringIndexerModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.VectorAssembler.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.VectorIndexer.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.VectorIndexerModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.VectorSlicer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.Word2Vec.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.Word2VecModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.recommendation.ALS.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.recommendation.ALSModel.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.recommendation.ALSModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegression.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegressionModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.GBTRegressor.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.IsotonicRegression.extractWeightedLabeledPoints"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.IsotonicRegression.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.IsotonicRegressionModel.extractWeightedLabeledPoints"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.IsotonicRegressionModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.LinearRegression.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.LinearRegressionSummary.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.LinearRegressionTrainingSummary.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressor.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.CrossValidator.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.CrossValidatorModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.TrainValidationSplit.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.TrainValidationSplitModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.BinaryClassificationMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.MulticlassMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.RegressionMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.DataFrameNaFunctions.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.DataFrameStatFunctions.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.DataFrameWriter.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.functions.broadcast"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.functions.callUDF"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.sources.CreatableRelationProvider.createRelation"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.sources.InsertableRelation.insert"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.fMeasureByThreshold"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.pr"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.precisionByThreshold"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.predictions"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.recallByThreshold"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.roc"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.clustering.LDAModel.describeTopics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.feature.Word2VecModel.findSynonyms"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.feature.Word2VecModel.getVectors"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.recommendation.ALSModel.itemFactors"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.recommendation.ALSModel.userFactors"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.LinearRegressionSummary.predictions"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.LinearRegressionSummary.residuals"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.scheduler.AccumulableInfo.name"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.scheduler.AccumulableInfo.value"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameNaFunctions.drop"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameNaFunctions.fill"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameNaFunctions.replace"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.jdbc"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.json"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.load"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.orc"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.parquet"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.table"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameReader.text"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameStatFunctions.crosstab"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameStatFunctions.freqItems"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.DataFrameStatFunctions.sampleBy"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.createExternalTable"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.emptyDataFrame"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.range"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.functions.udf"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.scheduler.JobLogger"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.receiver.ActorHelper"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.receiver.ActorSupervisorStrategy"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.receiver.ActorSupervisorStrategy$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.receiver.Statistics"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.streaming.receiver.Statistics$"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.executor.InputMetrics"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.executor.InputMetrics$"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.executor.OutputMetrics"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.executor.OutputMetrics$"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.functions$"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.Estimator.fit"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.Predictor.train"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.Transformer.transform"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.evaluation.Evaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.scheduler.SparkListener.onOtherEvent"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.CreatableRelationProvider.createRelation"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.InsertableRelation.insert")
|
2016-03-17 01:52:55 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13926] Automatically use Kryo serializer when shuffling RDDs with simple types
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ShuffleDependency.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ShuffleDependency.serializer"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.serializer.Serializer$")
|
2016-03-16 17:19:54 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-13927: add row/column iterator to local matrices
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.rowIter"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.colIter")
|
2016-03-17 02:02:25 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-13948: MiMa Check should catch if the visibility change to `private`
|
|
|
|
// TODO(josh): Some of these may be legitimate incompatibilities; we should follow up before the 2.0.0 release
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.Dataset.toDS"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.sources.OutputWriterFactory.newInstance"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.util.RpcUtils.askTimeout"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.util.RpcUtils.lookupTimeout"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.UnaryTransformer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.DecisionTreeClassifier.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.LogisticRegression.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.regression.DecisionTreeRegressor.train"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.Dataset.groupBy"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.Dataset.groupBy"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.Dataset.select"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.Dataset.toDF"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.Logging.initializeLogIfNecessary"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerEvent.logEvent"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.OutputWriterFactory.newInstance")
|
2016-03-25 01:59:35 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14014] Replace existing analysis.Catalog with SessionCatalog
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.this")
|
2016-03-17 07:23:38 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13928] Move org.apache.spark.Logging into org.apache.spark.internal.Logging
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.Logging"),
|
|
|
|
(problem: Problem) => problem match {
|
|
|
|
case MissingTypesProblem(_, missing)
|
|
|
|
if missing.map(_.fullName).sameElements(Seq("org.apache.spark.Logging")) => false
|
|
|
|
case _ => true
|
|
|
|
}
|
2016-03-21 20:19:39 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13990] Automatically pick serializer when caching RDDs
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.network.netty.NettyBlockTransferService.uploadBlock")
|
2016-03-26 08:41:34 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14089][CORE][MLLIB] Remove methods that has been deprecated since 1.1, 1.2, 1.3, 1.4, and 1.5
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.getThreadLocal"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.rdd.RDDFunctions.treeReduce"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.rdd.RDDFunctions.treeAggregate"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.tree.configuration.Strategy.defaultStategy"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.util.MLUtils.loadLibSVMFile"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.util.MLUtils.loadLibSVMFile"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.loadLibSVMFile"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.saveLabeledData"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.loadLabeledData"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.optimization.LBFGS.setMaxNumIterations"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.setScoreCol")
|
2016-03-28 21:53:47 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14205][SQL] remove trait Queryable
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.Dataset")
|
2016-04-01 02:48:36 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-11262][ML] Unit test for gradient, loss layers, memory management
|
|
|
|
// for multilayer perceptron.
|
|
|
|
// This class is marked as `private`.
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.ann.SoftmaxFunction")
|
2016-04-01 17:02:32 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13674][SQL] Add wholestage codegen support to Sample
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.util.random.PoissonSampler.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.util.random.PoissonSampler.this")
|
2016-04-06 15:07:47 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13430][ML] moved featureCol from LinearRegressionModelSummary to LinearRegressionSummary
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.regression.LinearRegressionSummary.this")
|
2016-04-08 20:18:19 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14437][Core] Use the address that NettyBlockTransferService listens to create BlockManagerId
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.network.netty.NettyBlockTransferService.this")
|
2016-04-07 22:48:33 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-13048][ML][MLLIB] keepLastCheckpoint option for LDA EM optimizer
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.DistributedLDAModel.this")
|
2016-04-11 21:33:54 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14475] Propagate user-defined context from driver to executors
|
2016-04-14 13:56:13 -04:00
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.getLocalProperty"),
|
|
|
|
// [SPARK-14617] Remove deprecated APIs in TaskMetrics
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.executor.InputMetrics$"),
|
[SPARK-14628][CORE] Simplify task metrics by always tracking read/write metrics
## What changes were proposed in this pull request?
Part of the reason why TaskMetrics and its callers are complicated are due to the optional metrics we collect, including input, output, shuffle read, and shuffle write. I think we can always track them and just assign 0 as the initial values. It is usually very obvious whether a task is supposed to read any data or not. By always tracking them, we can remove a lot of map, foreach, flatMap, getOrElse(0L) calls throughout Spark.
This patch also changes a few behaviors.
1. Removed the distinction of data read/write methods (e.g. Hadoop, Memory, Network, etc).
2. Accumulate all data reads and writes, rather than only the first method. (Fixes SPARK-5225)
## How was this patch tested?
existing tests.
This is bases on https://github.com/apache/spark/pull/12388, with more test fixes.
Author: Reynold Xin <rxin@databricks.com>
Author: Wenchen Fan <wenchen@databricks.com>
Closes #12417 from cloud-fan/metrics-refactor.
2016-04-15 18:39:39 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.executor.OutputMetrics$"),
|
|
|
|
// [SPARK-14628] Simplify task metrics by always tracking read/write metrics
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.readMethod"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.writeMethod")
|
2016-04-18 18:17:29 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-14628: Always track input/output/shuffle metrics
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.totalBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetrics.inputMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetrics.outputMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetrics.shuffleWriteMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetrics.shuffleReadMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.TaskMetrics.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.inputMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.outputMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.shuffleWriteMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.shuffleReadMetrics"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.this")
|
2016-04-21 17:18:18 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-13643: Move functionality from SQLContext to SparkSession
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.getSchema")
|
2016-04-19 20:32:23 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14407] Hides HadoopFsRelation related data source API into execution package
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.OutputWriter"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.OutputWriterFactory")
|
2016-04-21 19:50:09 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-14734: Add conversions between mllib and ml Vector, Matrix types
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.asML"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.asML")
|
2016-04-20 00:20:24 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-14704: Create accumulators in TaskMetrics
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.InputMetrics.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.OutputMetrics.this")
|
2016-04-25 23:54:31 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-14861: Replace internal usages of SQLContext with SparkSession
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.ml.clustering.LocalLDAModel.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.ml.clustering.DistributedLDAModel.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.ml.clustering.LDAModel.this"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem](
|
|
|
|
"org.apache.spark.ml.clustering.LDAModel.sqlContext"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.sql.Dataset.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.sql.DataFrameReader.this")
|
2016-05-10 10:28:35 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-14542 configurable buffer size for pipe RDD
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.rdd.RDD.pipe"),
|
|
|
|
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.pipe")
|
2016-04-21 13:02:23 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-4452][Core]Shuffle data structures can starve others on the same thread for memory
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.util.collection.Spillable")
|
2016-04-30 11:06:20 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-14952][Core][ML] Remove methods deprecated in 1.6
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.input.PortableDataStream.close"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionModel.weights"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.LinearRegressionModel.weights")
|
2016-05-09 14:51:37 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-10653] [Core] Remove unnecessary things from SparkEnv
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.sparkFilesDir"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.blockTransferService")
|
[SPARK-14654][CORE] New accumulator API
## What changes were proposed in this pull request?
This PR introduces a new accumulator API which is much simpler than before:
1. the type hierarchy is simplified, now we only have an `Accumulator` class
2. Combine `initialValue` and `zeroValue` concepts into just one concept: `zeroValue`
3. there in only one `register` method, the accumulator registration and cleanup registration are combined.
4. the `id`,`name` and `countFailedValues` are combined into an `AccumulatorMetadata`, and is provided during registration.
`SQLMetric` is a good example to show the simplicity of this new API.
What we break:
1. no `setValue` anymore. In the new API, the intermedia type can be different from the result type, it's very hard to implement a general `setValue`
2. accumulator can't be serialized before registered.
Problems need to be addressed in follow-ups:
1. with this new API, `AccumulatorInfo` doesn't make a lot of sense, the partial output is not partial updates, we need to expose the intermediate value.
2. `ExceptionFailure` should not carry the accumulator updates. Why do users care about accumulator updates for failed cases? It looks like we only use this feature to update the internal metrics, how about we sending a heartbeat to update internal metrics after the failure event?
3. the public event `SparkListenerTaskEnd` carries a `TaskMetrics`. Ideally this `TaskMetrics` don't need to carry external accumulators, as the only method of `TaskMetrics` that can access external accumulators is `private[spark]`. However, `SQLListener` use it to retrieve sql metrics.
## How was this patch tested?
existing tests
Author: Wenchen Fan <wenchen@databricks.com>
Closes #12612 from cloud-fan/acc.
2016-04-28 03:26:39 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-14654: New accumulator API
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ExceptionFailure$"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ExceptionFailure.apply"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ExceptionFailure.metrics"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ExceptionFailure.copy"),
|
|
|
|
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ExceptionFailure.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.ShuffleReadMetrics.remoteBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.ShuffleReadMetrics.totalBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.ShuffleReadMetrics.localBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.remoteBlocksFetched"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.localBlocksFetched")
|
2016-05-17 04:55:53 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// [SPARK-15290] Move annotations, like @Since / @DeveloperApi, into spark-tags
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.package$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.package"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.Private"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.AlphaComponent"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.Experimental"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.DeveloperApi")
|
2016-01-03 19:58:01 -05:00
|
|
|
)
|
2015-09-07 16:42:30 -04:00
|
|
|
case v if v.startsWith("1.6") =>
|
|
|
|
Seq(
|
2015-09-15 03:54:20 -04:00
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("network"),
|
2015-10-22 12:46:30 -04:00
|
|
|
MimaBuild.excludeSparkPackage("unsafe"),
|
2015-09-15 03:54:20 -04:00
|
|
|
// These are needed if checking against the sbt build, since they are part of
|
|
|
|
// the maven-generated artifacts in 1.3.
|
|
|
|
excludePackage("org.spark-project.jetty"),
|
|
|
|
MimaBuild.excludeSparkPackage("unused"),
|
|
|
|
// SQL execution is considered private.
|
2015-10-06 11:45:31 -04:00
|
|
|
excludePackage("org.apache.spark.sql.execution"),
|
|
|
|
// SQL columnar is considered private.
|
2015-10-22 12:46:30 -04:00
|
|
|
excludePackage("org.apache.spark.sql.columnar"),
|
|
|
|
// The shuffle package is considered private.
|
|
|
|
excludePackage("org.apache.spark.shuffle"),
|
2016-03-14 05:07:39 -04:00
|
|
|
// The collections utilities are considered private.
|
2015-10-22 12:46:30 -04:00
|
|
|
excludePackage("org.apache.spark.util.collection")
|
2015-09-15 03:54:20 -04:00
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("streaming.flume.FlumeTestUtils") ++
|
2015-09-15 20:11:21 -04:00
|
|
|
MimaBuild.excludeSparkClass("streaming.flume.PollingFlumeTestUtils") ++
|
2015-09-15 18:46:47 -04:00
|
|
|
Seq(
|
2015-11-17 15:51:20 -05:00
|
|
|
// MiMa does not deal properly with sealed traits
|
2015-11-04 11:28:33 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.ml.classification.LogisticRegressionSummary.featuresCol")
|
2015-12-10 09:05:45 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-11530
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.feature.PCAModel.this")
|
2015-09-15 20:11:21 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-10381 Fix types / units in private AskPermissionToCommitOutput RPC message.
|
|
|
|
// This class is marked as `private` but MiMa still seems to be confused by the change.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.task"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.copy$default$2"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.copy"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.taskAttempt"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.copy$default$3"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.apply")
|
2015-09-21 15:09:00 -04:00
|
|
|
) ++ Seq(
|
2015-09-18 16:48:41 -04:00
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.shuffle.FileShuffleBlockResolver$ShuffleFileGroup")
|
2015-09-21 15:09:00 -04:00
|
|
|
) ++ Seq(
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.ml.regression.LeastSquaresAggregator.add"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
2015-10-08 20:34:24 -04:00
|
|
|
"org.apache.spark.ml.regression.LeastSquaresCostFun.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.clearLastInstantiatedContext"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.setLastInstantiatedContext"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext$SQLSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.detachSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.tlSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.defaultSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.currentSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.openSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.setSession"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.createSession")
|
2015-10-19 04:59:18 -04:00
|
|
|
) ++ Seq(
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
2015-10-30 18:47:40 -04:00
|
|
|
"org.apache.spark.SparkContext.preferredNodeLocationData_="),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.rdd.MapPartitionsWithPreparationRDD"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
2015-11-24 21:58:55 -05:00
|
|
|
"org.apache.spark.rdd.MapPartitionsWithPreparationRDD$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SparkSQLParser")
|
2015-11-04 12:32:30 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-11485
|
2015-11-06 01:03:26 -05:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.DataFrameHolder.df"),
|
|
|
|
// SPARK-11541 mark various JDBC dialects as private
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.productElement"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.productArity"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.canEqual"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.productIterator"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.productPrefix"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.toString"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.NoopDialect.hashCode"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.jdbc.PostgresDialect$"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.productElement"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.productArity"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.canEqual"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.productIterator"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.productPrefix"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.toString"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.jdbc.PostgresDialect.hashCode"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.jdbc.NoopDialect$")
|
2015-11-09 12:59:32 -05:00
|
|
|
) ++ Seq (
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
2015-12-08 12:44:51 -05:00
|
|
|
"org.apache.spark.status.api.v1.ApplicationInfo.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.status.api.v1.StageData.this")
|
2015-11-17 13:17:16 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-11766 add toJson to Vector
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.toJson")
|
2015-11-17 19:57:52 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-9065 Support message handler in Kafka Python API
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper.createDirectStream"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper.createRDD")
|
2015-11-18 15:09:54 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4557 Changed foreachRDD to use VoidFunction
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.api.java.JavaDStreamLike.foreachRDD")
|
2015-11-26 21:56:22 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-11996 Make the executor thread dump work again
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.executor.ExecutorEndpoint"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.executor.ExecutorEndpoint$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.storage.BlockManagerMessages$GetRpcHostPortForExecutor"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.storage.BlockManagerMessages$GetRpcHostPortForExecutor$")
|
2015-12-02 04:40:07 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-3580 Add getNumPartitions method to JavaRDD
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.getNumPartitions")
|
2016-01-25 15:42:44 -05:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-12149 Added new fields to ExecutorSummary
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.ExecutorSummary.this")
|
2015-12-04 16:05:07 -05:00
|
|
|
) ++
|
|
|
|
// SPARK-11314: YARN backend moved to yarn sub-module and MiMA complains even though it's a
|
|
|
|
// private class.
|
|
|
|
MimaBuild.excludeSparkClass("scheduler.cluster.YarnSchedulerBackend$YarnSchedulerEndpoint")
|
2015-09-07 16:42:30 -04:00
|
|
|
case v if v.startsWith("1.5") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("network"),
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
// These are needed if checking against the sbt build, since they are part of
|
|
|
|
// the maven-generated artifacts in 1.3.
|
|
|
|
excludePackage("org.spark-project.jetty"),
|
|
|
|
MimaBuild.excludeSparkPackage("unused"),
|
|
|
|
// JavaRDDLike is not meant to be extended by user programs
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.partitioner"),
|
|
|
|
// Modification of private static method
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaUtils.org$apache$spark$streaming$kafka$KafkaUtils$$leadersForRanges"),
|
|
|
|
// Mima false positive (was a private[spark] class)
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.util.collection.PairIterator"),
|
|
|
|
// Removing a testing method from a private class
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaTestUtils.waitUntilLeaderOffset"),
|
|
|
|
// While private MiMa is still not happy about the changes,
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.ml.regression.LeastSquaresAggregator.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.ml.regression.LeastSquaresCostFun.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.ml.classification.LogisticCostFun.this"),
|
|
|
|
// SQL execution is considered private.
|
|
|
|
excludePackage("org.apache.spark.sql.execution"),
|
|
|
|
// The old JSON RDD is removed in favor of streaming Jackson
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.json.JsonRDD$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.json.JsonRDD"),
|
|
|
|
// local function inside a method
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.org$apache$spark$sql$SQLContext$$needsConversion$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.UDFRegistration.org$apache$spark$sql$UDFRegistration$$builder$24")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-8479 Add numNonzeros and numActives to Matrix.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.numNonzeros"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.numActives")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-8914 Remove RDDApi
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.RDDApi")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-7292 Provide operator to truncate lineage cheaply
|
|
|
|
ProblemFilters.exclude[AbstractClassProblem](
|
|
|
|
"org.apache.spark.rdd.RDDCheckpointData"),
|
|
|
|
ProblemFilters.exclude[AbstractClassProblem](
|
|
|
|
"org.apache.spark.rdd.CheckpointRDD")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-8701 Add input metadata in the batch page.
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.streaming.scheduler.InputInfo$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.streaming.scheduler.InputInfo")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-6797 Support YARN modes for SparkR
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.r.PairwiseRRDD.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.r.RRDD.createRWorker"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.r.RRDD.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.r.StringRRDD.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.r.BaseRRDD.this")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-7422 add argmax for sparse vectors
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.argmax")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-8906 Move all internal data source classes into execution.datasources
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.ResolvedDataSource"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreInsertCastAndRename$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsingAsSelect$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoDataSource$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils$PartitionValues$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DefaultWriterContainer"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils$PartitionValues"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.RefreshTable$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsing$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitionSpec"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DynamicPartitionWriterContainer"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsingAsSelect"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DescribeCommand$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreInsertCastAndRename"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Partition$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LogicalRelation$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitioningUtils"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LogicalRelation"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Partition"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.BaseWriterContainer"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreWriteCheck"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsing"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.RefreshTable"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DataSourceStrategy$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsing"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsingAsSelect$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTempTableUsingAsSelect"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CreateTableUsing$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.ResolvedDataSource$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PreWriteCheck$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoDataSource"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoHadoopFsRelation"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DDLParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.CaseInsensitiveMap"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DataSourceStrategy"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitionSpec$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DescribeCommand"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DDLException"),
|
|
|
|
// SPARK-9763 Minimize exposure of internal SQL classes
|
|
|
|
excludePackage("org.apache.spark.sql.parquet"),
|
|
|
|
excludePackage("org.apache.spark.sql.json"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRDD$DecimalConversion$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCPartition"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JdbcUtils$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRDD$DecimalConversion"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCPartitioningInfo$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCPartition$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.package"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRDD$JDBCConversion"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRDD$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.package$DriverWrapper"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRDD"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCPartitioningInfo"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JdbcUtils"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DefaultSource"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRelation$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.package$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.JDBCRelation")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4751 Dynamic allocation for standalone mode
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.SparkContext.supportDynamicAllocation")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-9580: Remove SQL test singletons
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.test.LocalSQLContext$SQLSession"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.test.LocalSQLContext"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.test.TestSQLContext"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.test.TestSQLContext$")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-9704 Made ProbabilisticClassifier, Identifiable, VectorUDT public APIs
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.VectorUDT.serialize")
|
2015-09-15 20:11:21 -04:00
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-10381 Fix types / units in private AskPermissionToCommitOutput RPC message.
|
|
|
|
// This class is marked as `private` but MiMa still seems to be confused by the change.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.task"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.copy$default$2"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.copy"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.taskAttempt"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.copy$default$3"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.scheduler.AskPermissionToCommitOutput.apply")
|
2015-09-07 16:42:30 -04:00
|
|
|
)
|
2015-07-08 21:09:39 -04:00
|
|
|
|
2015-09-07 16:42:30 -04:00
|
|
|
case v if v.startsWith("1.4") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("ml"),
|
2016-03-14 05:07:39 -04:00
|
|
|
// SPARK-7910 Adding a method to get the partitioner to JavaRDD,
|
2015-09-07 16:42:30 -04:00
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner"),
|
|
|
|
// SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff"),
|
|
|
|
// These are needed if checking against the sbt build, since they are part of
|
|
|
|
// the maven-generated artifacts in 1.3.
|
|
|
|
excludePackage("org.spark-project.jetty"),
|
|
|
|
MimaBuild.excludeSparkPackage("unused"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("com.google.common.base.Optional"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.rdd.JdbcRDD.compute"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.broadcast.HttpBroadcastFactory.newBroadcast"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint")
|
|
|
|
) ++ Seq(
|
2016-03-14 05:07:39 -04:00
|
|
|
// SPARK-4655 - Making Stage an Abstract class broke binary compatibility even though
|
2015-09-07 16:42:30 -04:00
|
|
|
// the stage class is defined as private[spark]
|
|
|
|
ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.scheduler.Stage")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-6510 Add a Graph#minus method acting as Set#difference
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.minus")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-6492 Fix deadlock in SparkContext.stop()
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.SparkContext.org$" +
|
|
|
|
"apache$spark$SparkContext$$SPARK_CONTEXT_CONSTRUCTOR_LOCK")
|
|
|
|
)++ Seq(
|
|
|
|
// SPARK-6693 add tostring with max lines and width for matrix
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.toString")
|
|
|
|
)++ Seq(
|
|
|
|
// SPARK-6703 Add getOrCreate method to SparkContext
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]
|
|
|
|
("org.apache.spark.SparkContext.org$apache$spark$SparkContext$$activeContext")
|
|
|
|
)++ Seq(
|
|
|
|
// SPARK-7090 Introduce LDAOptimizer to LDA to further improve extensibility
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.mllib.clustering.LDA$EMOptimizer")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-6756 add toSparse, toDense, numActives, numNonzeros, and compressed to Vector
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.compressed"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.toDense"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.numNonzeros"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.toSparse"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Vector.numActives"),
|
|
|
|
// SPARK-7681 add SparseVector support for gemv
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.multiply"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.DenseMatrix.multiply"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.SparseMatrix.multiply")
|
|
|
|
) ++ Seq(
|
|
|
|
// Execution should never be included as its always internal.
|
|
|
|
MimaBuild.excludeSparkPackage("sql.execution"),
|
|
|
|
// This `protected[sql]` method was removed in 1.3.1
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.sql.SQLContext.checkAnalysis"),
|
|
|
|
// These `private[sql]` class were removed in 1.4.0:
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.execution.AddExchange"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.execution.AddExchange$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.PartitionSpec"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.PartitionSpec$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.Partition"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.Partition$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetRelation2"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetRelation2$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache"),
|
|
|
|
// These test support classes were moved out of src/main and into src/test:
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetTestData"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetTestData$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.TestGroupWriteSupport"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CacheManager"),
|
|
|
|
// TODO: Remove the following rule once ParquetTest has been moved to src/test.
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.sql.parquet.ParquetTest")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-7530 Added StreamingContext.getState()
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.StreamingContext.state_=")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-7081 changed ShuffleWriter from a trait to an abstract class and removed some
|
|
|
|
// unnecessary type bounds in order to fix some compiler warnings that occurred when
|
|
|
|
// implementing this interface in Java. Note that ShuffleWriter is private[spark].
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.shuffle.ShuffleWriter")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-6888 make jdbc driver handling user definable
|
|
|
|
// This patch renames some classes to API friendly names.
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DriverQuirks$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DriverQuirks"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.PostgresQuirks"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.NoQuirks"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.MySQLQuirks")
|
|
|
|
)
|
2015-03-20 14:43:57 -04:00
|
|
|
|
2015-09-07 16:42:30 -04:00
|
|
|
case v if v.startsWith("1.3") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("ml"),
|
|
|
|
// These are needed if checking against the sbt build, since they are part of
|
|
|
|
// the maven-generated artifacts in the 1.2 build.
|
|
|
|
MimaBuild.excludeSparkPackage("unused"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("com.google.common.base.Optional")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-2321
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.SparkStageInfoImpl.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.SparkStageInfo.submissionTime")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4614
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrices.randn"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrices.rand")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5321
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.SparseMatrix.transposeMultiply"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.transpose"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.DenseMatrix.transposeMultiply"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix." +
|
|
|
|
"org$apache$spark$mllib$linalg$Matrix$_setter_$isTransposed_="),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.isTransposed"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.linalg.Matrix.foreachActive")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5540
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.solveLeastSquares"),
|
|
|
|
// SPARK-5536
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateBlock")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-3325
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.api.java.JavaDStreamLike.print"),
|
|
|
|
// SPARK-2757
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.streaming.flume.sink.SparkAvroCallbackHandler." +
|
|
|
|
"removeAndGetProcessor")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5123 (SparkSQL data type change) - alpha component only
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.ml.feature.HashingTF.outputDataType"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.ml.feature.Tokenizer.outputDataType"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.ml.feature.Tokenizer.validateInputType"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.ml.classification.LogisticRegression.validateAndTransformSchema")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4014
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.TaskContext.taskAttemptId"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.TaskContext.attemptNumber")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5166 Spark SQL API stabilization
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Transformer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Estimator.fit"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.Transformer.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Pipeline.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.PipelineModel.transform"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.Estimator.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Evaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.Evaluator.evaluate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.CrossValidator.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.CrossValidatorModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StandardScaler.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StandardScalerModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionModel.transform"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.LogisticRegression.fit"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.evaluate")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5270
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.isEmpty")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5430
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.treeReduce"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.treeAggregate")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5297 Java FileStream do not work with custom key/values
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.api.java.JavaStreamingContext.fileStream")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5315 Spark Streaming Java API returns Scala DStream
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.api.java.JavaDStreamLike.reduceByWindow")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5461 Graph should have isCheckpointed, getCheckpointFiles methods
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.graphx.Graph.getCheckpointFiles"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.graphx.Graph.isCheckpointed")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4789 Standardize ML Prediction APIs
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.mllib.linalg.VectorUDT"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.linalg.VectorUDT.serialize"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.linalg.VectorUDT.sqlType")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5814
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$wrapDoubleArray"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$fillFullMatrix"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$iterations"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$makeOutLinkBlock"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$computeYtY"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$makeLinkRDDs"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$alpha"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$randomFactor"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$makeInLinkBlock"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$dspr"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$lambda"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$implicitPrefs"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$rank")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4682
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.RealClock"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.Clock"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.TestClock")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff")
|
|
|
|
)
|
2014-11-19 00:24:18 -05:00
|
|
|
|
2015-09-07 16:42:30 -04:00
|
|
|
case v if v.startsWith("1.2") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("graphx")
|
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.linalg.Matrix") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.linalg.Vector") ++
|
|
|
|
Seq(
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.scheduler.TaskLocation"),
|
|
|
|
// Added normL1 and normL2 to trait MultivariateStatisticalSummary
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.stat.MultivariateStatisticalSummary.normL1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.stat.MultivariateStatisticalSummary.normL2"),
|
|
|
|
// MapStatus should be private[spark]
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.scheduler.MapStatus"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.network.netty.PathResolver"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.spark.network.netty.client.BlockClientListener"),
|
[SPARK-3453] Netty-based BlockTransferService, extracted from Spark core
This PR encapsulates #2330, which is itself a continuation of #2240. The first goal of this PR is to provide an alternate, simpler implementation of the ConnectionManager which is based on Netty.
In addition to this goal, however, we want to resolve [SPARK-3796](https://issues.apache.org/jira/browse/SPARK-3796), which calls for a standalone shuffle service which can be integrated into the YARN NodeManager, Standalone Worker, or on its own. This PR makes the first step in this direction by ensuring that the actual Netty service is as small as possible and extracted from Spark core. Given this, we should be able to construct this standalone jar which can be included in other JVMs without incurring significant dependency or runtime issues. The actual work to ensure that such a standalone shuffle service would work in Spark will be left for a future PR, however.
In order to minimize dependencies and allow for the service to be long-running (possibly much longer-running than Spark, and possibly having to support multiple version of Spark simultaneously), the entire service has been ported to Java, where we have full control over the binary compatibility of the components and do not depend on the Scala runtime or version.
These issues: have been addressed by folding in #2330:
SPARK-3453: Refactor Netty module to use BlockTransferService interface
SPARK-3018: Release all buffers upon task completion/failure
SPARK-3002: Create a connection pool and reuse clients across different threads
SPARK-3017: Integration tests and unit tests for connection failures
SPARK-3049: Make sure client doesn't block when server/connection has error(s)
SPARK-3502: SO_RCVBUF and SO_SNDBUF should be bootstrap childOption, not option
SPARK-3503: Disable thread local cache in PooledByteBufAllocator
TODO before mergeable:
- [x] Implement uploadBlock()
- [x] Unit tests for RPC side of code
- [x] Performance testing (see comments [here](https://github.com/apache/spark/pull/2753#issuecomment-59475022))
- [x] Turn OFF by default (currently on for unit testing)
Author: Reynold Xin <rxin@apache.org>
Author: Aaron Davidson <aaron@databricks.com>
Author: cocoatomo <cocoatomo77@gmail.com>
Author: Patrick Wendell <pwendell@gmail.com>
Author: Prashant Sharma <prashant.s@imaginea.com>
Author: Davies Liu <davies.liu@gmail.com>
Author: Anand Avati <avati@redhat.com>
Closes #2753 from aarondav/netty and squashes the following commits:
cadfd28 [Aaron Davidson] Turn netty off by default
d7be11b [Aaron Davidson] Turn netty on by default
4a204b8 [Aaron Davidson] Fail block fetches if client connection fails
2b0d1c0 [Aaron Davidson] 100ch
0c5bca2 [Aaron Davidson] Merge branch 'master' of https://github.com/apache/spark into netty
14e37f7 [Aaron Davidson] Address Reynold's comments
8dfcceb [Aaron Davidson] Merge branch 'master' of https://github.com/apache/spark into netty
322dfc1 [Aaron Davidson] Address Reynold's comments, including major rename
e5675a4 [Aaron Davidson] Fail outstanding RPCs as well
ccd4959 [Aaron Davidson] Don't throw exception if client immediately fails
9da0bc1 [Aaron Davidson] Add RPC unit tests
d236dfd [Aaron Davidson] Remove no-op serializer :)
7b7a26c [Aaron Davidson] Fix Nio compile issue
dd420fd [Aaron Davidson] Merge branch 'master' of https://github.com/apache/spark into netty-test
939f276 [Aaron Davidson] Attempt to make comm. bidirectional
aa58f67 [cocoatomo] [SPARK-3909][PySpark][Doc] A corrupted format in Sphinx documents and building warnings
8dc1ded [cocoatomo] [SPARK-3867][PySpark] ./python/run-tests failed when it run with Python 2.6 and unittest2 is not installed
5b5dbe6 [Prashant Sharma] [SPARK-2924] Required by scala 2.11, only one fun/ctor amongst overriden alternatives, can have default argument(s).
2c5d9dc [Patrick Wendell] HOTFIX: Fix build issue with Akka 2.3.4 upgrade.
020691e [Davies Liu] [SPARK-3886] [PySpark] use AutoBatchedSerializer by default
ae4083a [Anand Avati] [SPARK-2805] Upgrade Akka to 2.3.4
29c6dcf [Aaron Davidson] [SPARK-3453] Netty-based BlockTransferService, extracted from Spark core
f7e7568 [Reynold Xin] Fixed spark.shuffle.io.receiveBuffer setting.
5d98ce3 [Reynold Xin] Flip buffer.
f6c220d [Reynold Xin] Merge with latest master.
407e59a [Reynold Xin] Fix style violation.
a0518c7 [Reynold Xin] Implemented block uploads.
4b18db2 [Reynold Xin] Copy the buffer in fetchBlockSync.
bec4ea2 [Reynold Xin] Removed OIO and added num threads settings.
1bdd7ee [Reynold Xin] Fixed tests.
d68f328 [Reynold Xin] Logging close() in case close() fails.
f63fb4c [Reynold Xin] Add more debug message.
6afc435 [Reynold Xin] Added logging.
c066309 [Reynold Xin] Implement java.io.Closeable interface.
519d64d [Reynold Xin] Mark private package visibility and MimaExcludes.
f0a16e9 [Reynold Xin] Fixed test hanging.
14323a5 [Reynold Xin] Removed BlockManager.getLocalShuffleFromDisk.
b2f3281 [Reynold Xin] Added connection pooling.
d23ed7b [Reynold Xin] Incorporated feedback from Norman: - use same pool for boss and worker - remove ioratio - disable caching of byte buf allocator - childoption sendbuf/receivebuf - fire exception through pipeline
9e0cb87 [Reynold Xin] Fixed BlockClientHandlerSuite
5cd33d7 [Reynold Xin] Fixed style violation.
cb589ec [Reynold Xin] Added more test cases covering cleanup when fault happens in ShuffleBlockFetcherIteratorSuite
1be4e8e [Reynold Xin] Shorten NioManagedBuffer and NettyManagedBuffer class names.
108c9ed [Reynold Xin] Forgot to add TestSerializer to the commit list.
b5c8d1f [Reynold Xin] Fixed ShuffleBlockFetcherIteratorSuite.
064747b [Reynold Xin] Reference count buffers and clean them up properly.
2b44cf1 [Reynold Xin] Added more documentation.
1760d32 [Reynold Xin] Use Epoll.isAvailable in BlockServer as well.
165eab1 [Reynold Xin] [SPARK-3453] Refactor Netty module to use BlockTransferService.
2014-10-29 14:27:07 -04:00
|
|
|
|
2015-09-07 16:42:30 -04:00
|
|
|
// TaskContext was promoted to Abstract class
|
|
|
|
ProblemFilters.exclude[AbstractClassProblem](
|
|
|
|
"org.apache.spark.TaskContext"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.util.collection.SortDataFormat")
|
|
|
|
) ++ Seq(
|
|
|
|
// Adding new methods to the JavaRDDLike trait:
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.takeAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.foreachPartitionAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.countAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.collectAsync")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-3822
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.SparkContext.org$apache$spark$SparkContext$$createTaskScheduler")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-1209
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem](
|
|
|
|
"org.apache.hadoop.mapred.SparkHadoopMapRedUtil"),
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem](
|
|
|
|
"org.apache.spark.rdd.PairRDDFunctions")
|
|
|
|
) ++ Seq(
|
|
|
|
// SPARK-4062
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaReceiver#MessageHandler.this")
|
|
|
|
)
|
2014-09-16 00:14:00 -04:00
|
|
|
|
2015-09-07 16:42:30 -04:00
|
|
|
case v if v.startsWith("1.1") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("deploy"),
|
|
|
|
MimaBuild.excludeSparkPackage("graphx")
|
|
|
|
) ++
|
|
|
|
Seq(
|
|
|
|
// Adding new method to JavaRDLike trait - we should probably mark this as a developer API.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitions"),
|
|
|
|
// Should probably mark this as Experimental
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
|
|
|
|
// We made a mistake earlier (ed06500d3) in the Java API to use default parameter values
|
|
|
|
// for countApproxDistinct* functions, which does not work in Java. We later removed
|
|
|
|
// them, and use the following to tell Mima to not care about them.
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
|
|
|
|
ProblemFilters.exclude[IncompatibleResultTypeProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDD.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaRDDLike.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.DiskStore.getValues"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.MemoryStore.Entry")
|
|
|
|
) ++
|
|
|
|
Seq(
|
|
|
|
// Serializer interface change. See SPARK-3045.
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.DeserializationStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.Serializer"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.SerializationStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleTemplateDefProblem](
|
|
|
|
"org.apache.spark.serializer.SerializerInstance")
|
|
|
|
)++
|
|
|
|
Seq(
|
|
|
|
// Renamed putValues -> putArray + putIterator
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.MemoryStore.putValues"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.DiskStore.putValues"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.storage.TachyonStore.putValues")
|
|
|
|
) ++
|
|
|
|
Seq(
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.streaming.flume.FlumeReceiver.this"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaUtils.createStream"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.streaming.kafka.KafkaReceiver.this")
|
|
|
|
) ++
|
|
|
|
Seq( // Ignore some private methods in ALS.
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]( // The only public constructor is the one without arguments.
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.this"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures")
|
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.linalg.distributed.ColumnStatisticsAggregator") ++
|
|
|
|
MimaBuild.excludeSparkClass("rdd.ZippedRDD") ++
|
|
|
|
MimaBuild.excludeSparkClass("rdd.ZippedPartition") ++
|
|
|
|
MimaBuild.excludeSparkClass("util.SerializableHyperLogLog") ++
|
|
|
|
MimaBuild.excludeSparkClass("storage.Values") ++
|
|
|
|
MimaBuild.excludeSparkClass("storage.Entry") ++
|
|
|
|
MimaBuild.excludeSparkClass("storage.MemoryStore$Entry") ++
|
|
|
|
// Class was missing "@DeveloperApi" annotation in 1.0.
|
|
|
|
MimaBuild.excludeSparkClass("scheduler.SparkListenerApplicationStart") ++
|
|
|
|
Seq(
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.tree.impurity.Gini.calculate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.tree.impurity.Entropy.calculate"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem](
|
|
|
|
"org.apache.spark.mllib.tree.impurity.Variance.calculate")
|
|
|
|
) ++
|
|
|
|
Seq( // Package-private classes removed in SPARK-2341
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.BinaryLabelParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.BinaryLabelParser$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.LabelParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.LabelParser$"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.MulticlassLabelParser"),
|
|
|
|
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.util.MulticlassLabelParser$")
|
|
|
|
) ++
|
|
|
|
Seq( // package-private classes removed in MLlib
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem](
|
|
|
|
"org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.org$apache$spark$mllib$regression$GeneralizedLinearAlgorithm$$prependOne")
|
|
|
|
) ++
|
|
|
|
Seq( // new Vector methods in MLlib (binary compatible assuming users do not implement Vector)
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.copy")
|
|
|
|
) ++
|
|
|
|
Seq( // synthetic methods generated in LabeledPoint
|
|
|
|
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.mllib.regression.LabeledPoint$"),
|
|
|
|
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.regression.LabeledPoint.apply"),
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LabeledPoint.toString")
|
|
|
|
) ++
|
|
|
|
Seq ( // Scala 2.11 compatibility fix
|
|
|
|
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.StreamingContext.<init>$default$2")
|
|
|
|
)
|
|
|
|
case v if v.startsWith("1.0") =>
|
|
|
|
Seq(
|
|
|
|
MimaBuild.excludeSparkPackage("api.java"),
|
|
|
|
MimaBuild.excludeSparkPackage("mllib"),
|
|
|
|
MimaBuild.excludeSparkPackage("streaming")
|
|
|
|
) ++
|
|
|
|
MimaBuild.excludeSparkClass("rdd.ClassTags") ++
|
|
|
|
MimaBuild.excludeSparkClass("util.XORShiftRandom") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.EdgeRDD") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.VertexRDD") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.impl.GraphImpl") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.impl.RoutingTable") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.util.collection.PrimitiveKeyOpenHashMap") ++
|
|
|
|
MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.optimization.SquaredGradient") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.regression.LassoWithSGD") ++
|
|
|
|
MimaBuild.excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
|
|
|
|
case _ => Seq()
|
|
|
|
}
|
2015-10-22 12:46:30 -04:00
|
|
|
}
|