Merge pull request #225 from pwendell/dev
Log message which records RDD origin
This commit is contained in:
commit
9f6efbf06a
|
@ -61,6 +61,9 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
|
|||
def compute(split: Split): Iterator[T]
|
||||
@transient val dependencies: List[Dependency[_]]
|
||||
|
||||
// Record user function generating this RDD
|
||||
val origin = getOriginDescription
|
||||
|
||||
// Optionally overridden by subclasses to specify how they are partitioned
|
||||
val partitioner: Option[Partitioner] = None
|
||||
|
||||
|
@ -124,6 +127,38 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
|
|||
}
|
||||
}
|
||||
|
||||
// Describe which spark and user functions generated this RDD. Only works if called from
|
||||
// constructor.
|
||||
def getOriginDescription : String = {
|
||||
val trace = Thread.currentThread().getStackTrace().filter( el =>
|
||||
(!el.getMethodName().contains("getStackTrace")))
|
||||
|
||||
// Keep crawling up the stack trace until we find the first function not inside of the spark
|
||||
// package. We track the last (shallowest) contiguous Spark method. This might be an RDD
|
||||
// transformation, a SparkContext function (such as parallelize), or anything else that leads
|
||||
// to instantiation of an RDD. We also track the first (deepest) user method, file, and line.
|
||||
var lastSparkMethod = "<not_found>"
|
||||
var firstUserMethod = "<not_found>"
|
||||
var firstUserFile = "<not_found>"
|
||||
var firstUserLine = -1
|
||||
var finished = false
|
||||
|
||||
for (el <- trace) {
|
||||
if (!finished) {
|
||||
if (el.getClassName().contains("spark") && !el.getClassName().startsWith("spark.examples")) {
|
||||
lastSparkMethod = el.getMethodName()
|
||||
}
|
||||
else {
|
||||
firstUserMethod = el.getMethodName()
|
||||
firstUserLine = el.getLineNumber()
|
||||
firstUserFile = el.getFileName()
|
||||
finished = true
|
||||
}
|
||||
}
|
||||
}
|
||||
"%s at: %s (%s:%s)".format(lastSparkMethod, firstUserMethod, firstUserFile, firstUserLine)
|
||||
}
|
||||
|
||||
// Transformations (return a new RDD)
|
||||
|
||||
def map[U: ClassManifest](f: T => U): RDD[U] = new MappedRDD(this, sc.clean(f))
|
||||
|
|
|
@ -337,7 +337,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
|
|||
val missing = getMissingParentStages(stage).sortBy(_.id)
|
||||
logDebug("missing: " + missing)
|
||||
if (missing == Nil) {
|
||||
logInfo("Submitting " + stage + ", which has no missing parents")
|
||||
logInfo("Submitting " + stage + " from " + stage.rdd.origin +
|
||||
", which has no missing parents")
|
||||
submitMissingTasks(stage)
|
||||
running += stage
|
||||
} else {
|
||||
|
@ -452,6 +453,8 @@ class DAGScheduler(taskSched: TaskScheduler) extends TaskSchedulerListener with
|
|||
waiting --= newlyRunnable
|
||||
running ++= newlyRunnable
|
||||
for (stage <- newlyRunnable.sortBy(_.id)) {
|
||||
logInfo("Submitting " + stage + " from " + stage.rdd.origin +
|
||||
" which is now runnable")
|
||||
submitMissingTasks(stage)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue