From 36df86c0d058977f0f202abd0106881474f18f0e Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Tue, 24 Aug 2021 13:33:42 -0700 Subject: [PATCH] [SPARK-36564][CORE] Fix NullPointerException in LiveRDDDistribution.toApi ### What changes were proposed in this pull request? This PR fixes `NullPointerException` in `LiveRDDDistribution.toApi`. ### Why are the changes needed? Looking at the stack trace, the NPE is caused by the null `exec.hostPort`. I can't get the complete log to take a close look but only guess that it might be due to the event `SparkListenerBlockManagerAdded` is dropped or out of order. ``` 21/08/23 12:26:29 ERROR AsyncEventQueue: Listener AppStatusListener threw an exception java.lang.NullPointerException at com.google.common.base.Preconditions.checkNotNull(Preconditions.java:192) at com.google.common.collect.MapMakerInternalMap.putIfAbsent(MapMakerInternalMap.java:3507) at com.google.common.collect.Interners$WeakInterner.intern(Interners.java:85) at org.apache.spark.status.LiveEntityHelpers$.weakIntern(LiveEntity.scala:696) at org.apache.spark.status.LiveRDDDistribution.toApi(LiveEntity.scala:563) at org.apache.spark.status.LiveRDD.$anonfun$doUpdate$4(LiveEntity.scala:629) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.mutable.HashMap$$anon$2.$anonfun$foreach$3(HashMap.scala:158) at scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237) at scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44) at scala.collection.mutable.HashMap$$anon$2.foreach(HashMap.scala:158) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.spark.status.LiveRDD.doUpdate(LiveEntity.scala:629) at org.apache.spark.status.LiveEntity.write(LiveEntity.scala:51) at org.apache.spark.status.AppStatusListener.update(AppStatusListener.scala:1206) at org.apache.spark.status.AppStatusListener.maybeUpdate(AppStatusListener.scala:1212) at org.apache.spark.status.AppStatusListener.$anonfun$onExecutorMetricsUpdate$6(AppStatusListener.scala:956) ... ``` ### Does this PR introduce _any_ user-facing change? Yes, users will see the expected RDD info in UI instead of the NPE error. ### How was this patch tested? Pass existing tests. Closes #33812 from Ngone51/fix-hostport-npe. Authored-by: yi.wu Signed-off-by: Dongjoon Hyun (cherry picked from commit d6c453aaea06327b37ab13b03a35a23a8225f010) Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/status/LiveEntity.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala index fc5fc32d64..b3dc3673b2 100644 --- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala +++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala @@ -553,7 +553,7 @@ private class LiveRDDDistribution(exec: LiveExecutor) { def toApi(): v1.RDDDataDistribution = { if (lastUpdate == null) { lastUpdate = new v1.RDDDataDistribution( - weakIntern(exec.hostPort), + weakIntern(if (exec.hostPort != null) exec.hostPort else exec.host), memoryUsed, exec.maxMemory - exec.memoryUsed, diskUsed,