[SPARK-23825][K8S] Requesting memory + memory overhead for pod memory

## What changes were proposed in this pull request? Kubernetes driver and executor pods should request `memory + memoryOverhead` as their resources instead of just `memory`, see https://issues.apache.org/jira/browse/SPARK-23825 ## How was this patch tested? Existing unit tests were adapted. Author: David Vogelbacher <dvogelbacher@palantir.com> Closes #20943 from dvogelbacher/spark-23825.
2018-04-02 12:00:37 -07:00 · 2018-04-02 12:00:37 -07:00 · 6151f29f9f
parent 44a9f8e6e8
commit 6151f29f9f
4 changed files with 7 additions and 11 deletions
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala
@ -93,9 +93,6 @@ private[spark] class BasicDriverConfigurationStep(
      .withAmount(driverCpuCores)
      .build()
    val driverMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${driverMemoryMiB}Mi")
-      .build()
-    val driverMemoryLimitQuantity = new QuantityBuilder(false)
      .withAmount(s"${driverMemoryWithOverheadMiB}Mi")
      .build()
    val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
@ -117,7 +114,7 @@ private[spark] class BasicDriverConfigurationStep(
      .withNewResources()
        .addToRequests("cpu", driverCpuQuantity)
        .addToRequests("memory", driverMemoryQuantity)
-        .addToLimits("memory", driverMemoryLimitQuantity)
+        .addToLimits("memory", driverMemoryQuantity)
        .addToLimits(maybeCpuLimitQuantity.toMap.asJava)
        .endResources()
      .addToArgs("driver")
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
@ -108,9 +108,6 @@ private[spark] class ExecutorPodFactory(
      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
      executorLabels
    val executorMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${executorMemoryMiB}Mi")
-      .build()
-    val executorMemoryLimitQuantity = new QuantityBuilder(false)
      .withAmount(s"${executorMemoryWithOverhead}Mi")
      .build()
    val executorCpuQuantity = new QuantityBuilder(false)
@ -167,7 +164,7 @@ private[spark] class ExecutorPodFactory(
      .withImagePullPolicy(imagePullPolicy)
      .withNewResources()
        .addToRequests("memory", executorMemoryQuantity)
-        .addToLimits("memory", executorMemoryLimitQuantity)
+        .addToLimits("memory", executorMemoryQuantity)
        .addToRequests("cpu", executorCpuQuantity)
        .endResources()
      .addAllToEnv(executorEnv.asJava)
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala
@ -91,7 +91,7 @@ class BasicDriverConfigurationStepSuite extends SparkFunSuite {
    val resourceRequirements = preparedDriverSpec.driverContainer.getResources
    val requests = resourceRequirements.getRequests.asScala
    assert(requests("cpu").getAmount === "2")
-    assert(requests("memory").getAmount === "256Mi")
+    assert(requests("memory").getAmount === "456Mi")
    val limits = resourceRequirements.getLimits.asScala
    assert(limits("memory").getAmount === "456Mi")
    assert(limits("cpu").getAmount === "4")
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala
@ -66,12 +66,14 @@ class ExecutorPodFactorySuite extends SparkFunSuite with BeforeAndAfter with Bef
    assert(executor.getMetadata.getLabels.size() === 3)
    assert(executor.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL) === "1")

-    // There is exactly 1 container with no volume mounts and default memory limits.
-    // Default memory limit is 1024M + 384M (minimum overhead constant).
+    // There is exactly 1 container with no volume mounts and default memory limits and requests.
+    // Default memory limit/request is 1024M + 384M (minimum overhead constant).
    assert(executor.getSpec.getContainers.size() === 1)
    assert(executor.getSpec.getContainers.get(0).getImage === executorImage)
    assert(executor.getSpec.getContainers.get(0).getVolumeMounts.isEmpty)
    assert(executor.getSpec.getContainers.get(0).getResources.getLimits.size() === 1)
+    assert(executor.getSpec.getContainers.get(0).getResources
+      .getRequests.get("memory").getAmount === "1408Mi")
    assert(executor.getSpec.getContainers.get(0).getResources
      .getLimits.get("memory").getAmount === "1408Mi")