[SPARK-10620][SPARK-13054] Minor addendum to #10835
Additional changes to #10835, mainly related to style and visibility. This patch also adds back a few deprecated methods for backward compatibility. Author: Andrew Or <andrew@databricks.com> Closes #10958 from andrewor14/task-metrics-to-accums-followups.
This commit is contained in:
parent
ff0af0ddfa
commit
eeaf45b926
|
@ -60,19 +60,20 @@ import org.apache.spark.storage.{BlockId, BlockStatus}
|
|||
* @tparam T result type
|
||||
*/
|
||||
class Accumulator[T] private[spark] (
|
||||
@transient private[spark] val initialValue: T,
|
||||
// SI-8813: This must explicitly be a private val, or else scala 2.11 doesn't compile
|
||||
@transient private val initialValue: T,
|
||||
param: AccumulatorParam[T],
|
||||
name: Option[String],
|
||||
internal: Boolean,
|
||||
override val countFailedValues: Boolean = false)
|
||||
private[spark] override val countFailedValues: Boolean = false)
|
||||
extends Accumulable[T, T](initialValue, param, name, internal, countFailedValues) {
|
||||
|
||||
def this(initialValue: T, param: AccumulatorParam[T], name: Option[String]) = {
|
||||
this(initialValue, param, name, false)
|
||||
this(initialValue, param, name, false /* internal */)
|
||||
}
|
||||
|
||||
def this(initialValue: T, param: AccumulatorParam[T]) = {
|
||||
this(initialValue, param, None, false)
|
||||
this(initialValue, param, None, false /* internal */)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -84,7 +85,7 @@ private[spark] object Accumulators extends Logging {
|
|||
* This global map holds the original accumulator objects that are created on the driver.
|
||||
* It keeps weak references to these objects so that accumulators can be garbage-collected
|
||||
* once the RDDs and user-code that reference them are cleaned up.
|
||||
* TODO: Don't use a global map; these should be tied to a SparkContext at the very least.
|
||||
* TODO: Don't use a global map; these should be tied to a SparkContext (SPARK-13051).
|
||||
*/
|
||||
@GuardedBy("Accumulators")
|
||||
val originals = mutable.Map[Long, WeakReference[Accumulable[_, _]]]()
|
||||
|
|
|
@ -119,7 +119,7 @@ private[spark] object InternalAccumulator {
|
|||
/**
|
||||
* Accumulators for tracking internal metrics.
|
||||
*/
|
||||
def create(): Seq[Accumulator[_]] = {
|
||||
def createAll(): Seq[Accumulator[_]] = {
|
||||
Seq[String](
|
||||
EXECUTOR_DESERIALIZE_TIME,
|
||||
EXECUTOR_RUN_TIME,
|
||||
|
@ -188,7 +188,7 @@ private[spark] object InternalAccumulator {
|
|||
* values across all tasks within each stage.
|
||||
*/
|
||||
def create(sc: SparkContext): Seq[Accumulator[_]] = {
|
||||
val accums = create()
|
||||
val accums = createAll()
|
||||
accums.foreach { accum =>
|
||||
Accumulators.register(accum)
|
||||
sc.cleaner.foreach(_.registerAccumulatorForCleanup(accum))
|
||||
|
|
|
@ -32,7 +32,7 @@ private[spark] class TaskContextImpl(
|
|||
override val attemptNumber: Int,
|
||||
override val taskMemoryManager: TaskMemoryManager,
|
||||
@transient private val metricsSystem: MetricsSystem,
|
||||
initialAccumulators: Seq[Accumulator[_]] = InternalAccumulator.create())
|
||||
initialAccumulators: Seq[Accumulator[_]] = InternalAccumulator.createAll())
|
||||
extends TaskContext
|
||||
with Logging {
|
||||
|
||||
|
|
|
@ -118,7 +118,7 @@ case class ExceptionFailure(
|
|||
description: String,
|
||||
stackTrace: Array[StackTraceElement],
|
||||
fullStackTrace: String,
|
||||
exceptionWrapper: Option[ThrowableSerializationWrapper],
|
||||
private val exceptionWrapper: Option[ThrowableSerializationWrapper],
|
||||
accumUpdates: Seq[AccumulableInfo] = Seq.empty[AccumulableInfo])
|
||||
extends TaskFailedReason {
|
||||
|
||||
|
|
|
@ -300,15 +300,15 @@ private[spark] class Executor(
|
|||
|
||||
// Collect latest accumulator values to report back to the driver
|
||||
val accumulatorUpdates: Seq[AccumulableInfo] =
|
||||
if (task != null) {
|
||||
task.metrics.foreach { m =>
|
||||
m.setExecutorRunTime(System.currentTimeMillis() - taskStart)
|
||||
m.setJvmGCTime(computeTotalGcTime() - startGCTime)
|
||||
if (task != null) {
|
||||
task.metrics.foreach { m =>
|
||||
m.setExecutorRunTime(System.currentTimeMillis() - taskStart)
|
||||
m.setJvmGCTime(computeTotalGcTime() - startGCTime)
|
||||
}
|
||||
task.collectAccumulatorUpdates(taskFailed = true)
|
||||
} else {
|
||||
Seq.empty[AccumulableInfo]
|
||||
}
|
||||
task.collectAccumulatorUpdates(taskFailed = true)
|
||||
} else {
|
||||
Seq.empty[AccumulableInfo]
|
||||
}
|
||||
|
||||
val serializedTaskEndReason = {
|
||||
try {
|
||||
|
|
|
@ -45,13 +45,12 @@ import org.apache.spark.storage.{BlockId, BlockStatus}
|
|||
* these requirements.
|
||||
*/
|
||||
@DeveloperApi
|
||||
class TaskMetrics(initialAccums: Seq[Accumulator[_]]) extends Serializable {
|
||||
|
||||
class TaskMetrics private[spark] (initialAccums: Seq[Accumulator[_]]) extends Serializable {
|
||||
import InternalAccumulator._
|
||||
|
||||
// Needed for Java tests
|
||||
def this() {
|
||||
this(InternalAccumulator.create())
|
||||
this(InternalAccumulator.createAll())
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -144,6 +143,11 @@ class TaskMetrics(initialAccums: Seq[Accumulator[_]]) extends Serializable {
|
|||
if (updatedBlockStatuses.nonEmpty) Some(updatedBlockStatuses) else None
|
||||
}
|
||||
|
||||
@deprecated("setting updated blocks is not allowed", "2.0.0")
|
||||
def updatedBlocks_=(blocks: Option[Seq[(BlockId, BlockStatus)]]): Unit = {
|
||||
blocks.foreach(setUpdatedBlockStatuses)
|
||||
}
|
||||
|
||||
// Setters and increment-ers
|
||||
private[spark] def setExecutorDeserializeTime(v: Long): Unit =
|
||||
_executorDeserializeTime.setValue(v)
|
||||
|
@ -220,6 +224,11 @@ class TaskMetrics(initialAccums: Seq[Accumulator[_]]) extends Serializable {
|
|||
*/
|
||||
def outputMetrics: Option[OutputMetrics] = _outputMetrics
|
||||
|
||||
@deprecated("setting OutputMetrics is for internal use only", "2.0.0")
|
||||
def outputMetrics_=(om: Option[OutputMetrics]): Unit = {
|
||||
_outputMetrics = om
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create a new [[OutputMetrics]] associated with this task.
|
||||
*/
|
||||
|
@ -296,6 +305,11 @@ class TaskMetrics(initialAccums: Seq[Accumulator[_]]) extends Serializable {
|
|||
*/
|
||||
def shuffleWriteMetrics: Option[ShuffleWriteMetrics] = _shuffleWriteMetrics
|
||||
|
||||
@deprecated("setting ShuffleWriteMetrics is for internal use only", "2.0.0")
|
||||
def shuffleWriteMetrics_=(swm: Option[ShuffleWriteMetrics]): Unit = {
|
||||
_shuffleWriteMetrics = swm
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create a new [[ShuffleWriteMetrics]] associated with this task.
|
||||
*/
|
||||
|
|
|
@ -49,7 +49,7 @@ private[spark] class ResultTask[T, U](
|
|||
partition: Partition,
|
||||
locs: Seq[TaskLocation],
|
||||
val outputId: Int,
|
||||
_initialAccums: Seq[Accumulator[_]] = InternalAccumulator.create())
|
||||
_initialAccums: Seq[Accumulator[_]] = InternalAccumulator.createAll())
|
||||
extends Task[U](stageId, stageAttemptId, partition.index, _initialAccums)
|
||||
with Serializable {
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ case class SparkListenerTaskEnd(
|
|||
taskType: String,
|
||||
reason: TaskEndReason,
|
||||
taskInfo: TaskInfo,
|
||||
// may be null if the task has failed
|
||||
@Nullable taskMetrics: TaskMetrics)
|
||||
extends SparkListenerEvent
|
||||
|
||||
|
|
|
@ -408,9 +408,9 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
|
|||
</td> +:
|
||||
getFormattedTimeQuantiles(gettingResultTimes)
|
||||
|
||||
val peakExecutionMemory = validTasks.map { case TaskUIData(_, metrics, _) =>
|
||||
metrics.get.peakExecutionMemory.toDouble
|
||||
}
|
||||
val peakExecutionMemory = validTasks.map { case TaskUIData(_, metrics, _) =>
|
||||
metrics.get.peakExecutionMemory.toDouble
|
||||
}
|
||||
val peakExecutionMemoryQuantiles = {
|
||||
<td>
|
||||
<span data-toggle="tooltip"
|
||||
|
|
|
@ -236,7 +236,7 @@ private[spark] object JsonProtocol {
|
|||
val accumUpdates = metricsUpdate.accumUpdates
|
||||
("Event" -> Utils.getFormattedClassName(metricsUpdate)) ~
|
||||
("Executor ID" -> execId) ~
|
||||
("Metrics Updated" -> accumUpdates.map { case (taskId, stageId, stageAttemptId, updates) =>
|
||||
("Metrics Updated" -> accumUpdates.map { case (taskId, stageId, stageAttemptId, updates) =>
|
||||
("Task ID" -> taskId) ~
|
||||
("Stage ID" -> stageId) ~
|
||||
("Stage Attempt ID" -> stageAttemptId) ~
|
||||
|
|
|
@ -268,7 +268,7 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
|
|||
val acc1 = new Accumulator(0, IntAccumulatorParam, Some("thing"), internal = false)
|
||||
val acc2 = new Accumulator(0L, LongAccumulatorParam, Some("thing2"), internal = false)
|
||||
val externalAccums = Seq(acc1, acc2)
|
||||
val internalAccums = InternalAccumulator.create()
|
||||
val internalAccums = InternalAccumulator.createAll()
|
||||
// Set some values; these should not be observed later on the "executors"
|
||||
acc1.setValue(10)
|
||||
acc2.setValue(20L)
|
||||
|
|
|
@ -87,7 +87,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
|
|||
}
|
||||
|
||||
test("create") {
|
||||
val accums = create()
|
||||
val accums = createAll()
|
||||
val shuffleReadAccums = createShuffleReadAccums()
|
||||
val shuffleWriteAccums = createShuffleWriteAccums()
|
||||
val inputAccums = createInputAccums()
|
||||
|
@ -123,7 +123,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
|
|||
}
|
||||
|
||||
test("naming") {
|
||||
val accums = create()
|
||||
val accums = createAll()
|
||||
val shuffleReadAccums = createShuffleReadAccums()
|
||||
val shuffleWriteAccums = createShuffleWriteAccums()
|
||||
val inputAccums = createInputAccums()
|
||||
|
@ -291,7 +291,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
|
|||
}
|
||||
assert(Accumulators.originals.isEmpty)
|
||||
sc.parallelize(1 to 100).map { i => (i, i) }.reduceByKey { _ + _ }.count()
|
||||
val internalAccums = InternalAccumulator.create()
|
||||
val internalAccums = InternalAccumulator.createAll()
|
||||
// We ran 2 stages, so we should have 2 sets of internal accumulators, 1 for each stage
|
||||
assert(Accumulators.originals.size === internalAccums.size * 2)
|
||||
val accumsRegistered = sc.cleaner match {
|
||||
|
|
|
@ -31,7 +31,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
import TaskMetricsSuite._
|
||||
|
||||
test("create") {
|
||||
val internalAccums = InternalAccumulator.create()
|
||||
val internalAccums = InternalAccumulator.createAll()
|
||||
val tm1 = new TaskMetrics
|
||||
val tm2 = new TaskMetrics(internalAccums)
|
||||
assert(tm1.accumulatorUpdates().size === internalAccums.size)
|
||||
|
@ -51,7 +51,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
test("create with unnamed accum") {
|
||||
intercept[IllegalArgumentException] {
|
||||
new TaskMetrics(
|
||||
InternalAccumulator.create() ++ Seq(
|
||||
InternalAccumulator.createAll() ++ Seq(
|
||||
new Accumulator(0, IntAccumulatorParam, None, internal = true)))
|
||||
}
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
test("create with duplicate name accum") {
|
||||
intercept[IllegalArgumentException] {
|
||||
new TaskMetrics(
|
||||
InternalAccumulator.create() ++ Seq(
|
||||
InternalAccumulator.createAll() ++ Seq(
|
||||
new Accumulator(0, IntAccumulatorParam, Some(RESULT_SIZE), internal = true)))
|
||||
}
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
test("create with external accum") {
|
||||
intercept[IllegalArgumentException] {
|
||||
new TaskMetrics(
|
||||
InternalAccumulator.create() ++ Seq(
|
||||
InternalAccumulator.createAll() ++ Seq(
|
||||
new Accumulator(0, IntAccumulatorParam, Some("x"))))
|
||||
}
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
}
|
||||
|
||||
test("mutating values") {
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val tm = new TaskMetrics(accums)
|
||||
// initial values
|
||||
assertValueEquals(tm, _.executorDeserializeTime, accums, EXECUTOR_DESERIALIZE_TIME, 0L)
|
||||
|
@ -180,7 +180,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("mutating shuffle read metrics values") {
|
||||
import shuffleRead._
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val tm = new TaskMetrics(accums)
|
||||
def assertValEquals[T](tmValue: ShuffleReadMetrics => T, name: String, value: T): Unit = {
|
||||
assertValueEquals(tm, tm => tmValue(tm.shuffleReadMetrics.get), accums, name, value)
|
||||
|
@ -234,7 +234,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("mutating shuffle write metrics values") {
|
||||
import shuffleWrite._
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val tm = new TaskMetrics(accums)
|
||||
def assertValEquals[T](tmValue: ShuffleWriteMetrics => T, name: String, value: T): Unit = {
|
||||
assertValueEquals(tm, tm => tmValue(tm.shuffleWriteMetrics.get), accums, name, value)
|
||||
|
@ -267,7 +267,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("mutating input metrics values") {
|
||||
import input._
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val tm = new TaskMetrics(accums)
|
||||
def assertValEquals(tmValue: InputMetrics => Any, name: String, value: Any): Unit = {
|
||||
assertValueEquals(tm, tm => tmValue(tm.inputMetrics.get), accums, name, value,
|
||||
|
@ -296,7 +296,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("mutating output metrics values") {
|
||||
import output._
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val tm = new TaskMetrics(accums)
|
||||
def assertValEquals(tmValue: OutputMetrics => Any, name: String, value: Any): Unit = {
|
||||
assertValueEquals(tm, tm => tmValue(tm.outputMetrics.get), accums, name, value,
|
||||
|
@ -381,7 +381,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
}
|
||||
|
||||
test("additional accumulables") {
|
||||
val internalAccums = InternalAccumulator.create()
|
||||
val internalAccums = InternalAccumulator.createAll()
|
||||
val tm = new TaskMetrics(internalAccums)
|
||||
assert(tm.accumulatorUpdates().size === internalAccums.size)
|
||||
val acc1 = new Accumulator(0, IntAccumulatorParam, Some("a"))
|
||||
|
@ -419,7 +419,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("existing values in shuffle read accums") {
|
||||
// set shuffle read accum before passing it into TaskMetrics
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val srAccum = accums.find(_.name === Some(shuffleRead.FETCH_WAIT_TIME))
|
||||
assert(srAccum.isDefined)
|
||||
srAccum.get.asInstanceOf[Accumulator[Long]] += 10L
|
||||
|
@ -432,7 +432,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("existing values in shuffle write accums") {
|
||||
// set shuffle write accum before passing it into TaskMetrics
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val swAccum = accums.find(_.name === Some(shuffleWrite.RECORDS_WRITTEN))
|
||||
assert(swAccum.isDefined)
|
||||
swAccum.get.asInstanceOf[Accumulator[Long]] += 10L
|
||||
|
@ -445,7 +445,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("existing values in input accums") {
|
||||
// set input accum before passing it into TaskMetrics
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val inAccum = accums.find(_.name === Some(input.RECORDS_READ))
|
||||
assert(inAccum.isDefined)
|
||||
inAccum.get.asInstanceOf[Accumulator[Long]] += 10L
|
||||
|
@ -458,7 +458,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
|
||||
test("existing values in output accums") {
|
||||
// set output accum before passing it into TaskMetrics
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
val outAccum = accums.find(_.name === Some(output.RECORDS_WRITTEN))
|
||||
assert(outAccum.isDefined)
|
||||
outAccum.get.asInstanceOf[Accumulator[Long]] += 10L
|
||||
|
@ -470,7 +470,7 @@ class TaskMetricsSuite extends SparkFunSuite {
|
|||
}
|
||||
|
||||
test("from accumulator updates") {
|
||||
val accumUpdates1 = InternalAccumulator.create().map { a =>
|
||||
val accumUpdates1 = InternalAccumulator.createAll().map { a =>
|
||||
AccumulableInfo(a.id, a.name, Some(3L), None, a.isInternal, a.countFailedValues)
|
||||
}
|
||||
val metrics1 = TaskMetrics.fromAccumulatorUpdates(accumUpdates1)
|
||||
|
|
|
@ -127,7 +127,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
|
|||
val param = AccumulatorParam.LongAccumulatorParam
|
||||
val acc1 = new Accumulator(0L, param, Some("x"), internal = false, countFailedValues = true)
|
||||
val acc2 = new Accumulator(0L, param, Some("y"), internal = false, countFailedValues = false)
|
||||
val initialAccums = InternalAccumulator.create()
|
||||
val initialAccums = InternalAccumulator.createAll()
|
||||
// Create a dummy task. We won't end up running this; we just want to collect
|
||||
// accumulator updates from it.
|
||||
val task = new Task[Int](0, 0, 0, Seq.empty[Accumulator[_]]) {
|
||||
|
|
|
@ -269,7 +269,7 @@ class JobProgressListenerSuite extends SparkFunSuite with LocalSparkContext with
|
|||
val execId = "exe-1"
|
||||
|
||||
def makeTaskMetrics(base: Int): TaskMetrics = {
|
||||
val accums = InternalAccumulator.create()
|
||||
val accums = InternalAccumulator.createAll()
|
||||
accums.foreach(Accumulators.register)
|
||||
val taskMetrics = new TaskMetrics(accums)
|
||||
val shuffleReadMetrics = taskMetrics.registerTempShuffleReadMetrics()
|
||||
|
|
|
@ -508,7 +508,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
|
|||
|
||||
/** -------------------------------- *
|
||||
| Util methods for comparing events |
|
||||
* --------------------------------- */
|
||||
* --------------------------------- */
|
||||
|
||||
private[spark] def assertEquals(event1: SparkListenerEvent, event2: SparkListenerEvent) {
|
||||
(event1, event2) match {
|
||||
|
@ -773,7 +773,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
|
|||
|
||||
/** ----------------------------------- *
|
||||
| Util methods for constructing events |
|
||||
* ------------------------------------ */
|
||||
* ------------------------------------ */
|
||||
|
||||
private val properties = {
|
||||
val p = new Properties
|
||||
|
|
|
@ -187,7 +187,8 @@ object MimaExcludes {
|
|||
) ++ Seq(
|
||||
// SPARK-12896 Send only accumulator updates to driver, not TaskMetrics
|
||||
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.Accumulable.this"),
|
||||
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.Accumulator.this")
|
||||
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.Accumulator.this"),
|
||||
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.Accumulator.initialValue")
|
||||
) ++ Seq(
|
||||
// SPARK-12692 Scala style: Fix the style violation (Space before "," or ":")
|
||||
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.SparkSink.org$apache$spark$streaming$flume$sink$Logging$$log_"),
|
||||
|
|
Loading…
Reference in a new issue