From 765106cb804008111c6783f288419cc76c95d0e4 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 21 Jun 2021 19:10:11 -0700 Subject: [PATCH] [SPARK-35699][K8S] Improve error message when creating k8s pod failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Improve error message when clients use wrong master URL to submit a job to k8s. ### Why are the changes needed? Current error messages are not clear for users. ``` (base) ➜ spark git:(master) ./bin/spark-submit \ --master k8s://https://192.168.49.3:8443 \ --name spark-pi \ --class org.apache.spark.examples.SparkPi \ --conf spark.executor.instances=3 \ --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \ --conf spark.kubernetes.container.image=pingsutw/spark:testing \ local:///opt/spark/examples/jars/spark-examples_2.12-3.2.0-SNAPSHOT.jar 21/06/09 20:50:37 WARN Utils: Your hostname, kobe-pc resolves to a loopback address: 127.0.1.1; using 192.168.103.20 instead (on interface ens160) 21/06/09 20:50:37 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address 21/06/09 20:50:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 21/06/09 20:50:38 INFO SparkKubernetesClientFactory: Auto-configuring K8S client using current context from users K8S config file 21/06/09 20:50:39 INFO KerberosConfDriverFeatureStep: You have not specified a krb5.conf file locally or via a ConfigMap. Make sure that you have the krb5.conf locally on the driver image. Exception in thread "main" io.fabric8.kubernetes.client.KubernetesClientException: Operation: [create] for kind: [Pod] with name: [null] in namespace: [default] failed. at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:64) at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:64) at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:72) at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:72) at io.fabric8.kubernetes.client.dsl.base.BaseOperation.create(BaseOperation.java:380) at io.fabric8.kubernetes.client.dsl.base.BaseOperation.create(BaseOperation.java:380) at io.fabric8.kubernetes.client.dsl.base.BaseOperation.create(BaseOperation.java:86) at io.fabric8.kubernetes.client.dsl.base.BaseOperation.create(BaseOperation.java:86) ``` Below command to reproduce; ``` ./bin/spark-submit \ --master k8s://https://192.168.49.2:8443 \ --deploy-mode cluster \ --name spark-pi \ --class org.apache.spark.examples.SparkPi \ --conf spark.executor.instances=3 \ --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \ --conf spark.kubernetes.container.image=pingsutw/spark:testing \ local:///opt/spark/examples/jars/spark-examples_2.12-3.2.0-SNAPSHOT.jar ``` ### Does this PR introduce _any_ user-facing change? Yes, users will see more clear error messages. ### How was this patch tested? Pass the CIs. Closes #32874 from pingsutw/SPARK-35699. Authored-by: Kevin Su Signed-off-by: Dongjoon Hyun --- .../deploy/k8s/submit/KubernetesClientApplication.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala index d479d6688a..3140502a5a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala @@ -136,7 +136,14 @@ private[spark] class Client( val driverPodName = resolvedDriverPod.getMetadata.getName var watch: Watch = null - val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + var createdDriverPod: Pod = null + try { + createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + } catch { + case NonFatal(e) => + logError("Please check \"kubectl auth can-i create pod\" first. It should be yes.") + throw e + } try { val otherKubernetesResources = resolvedDriverSpec.driverKubernetesResources ++ Seq(configMap) addOwnerReference(createdDriverPod, otherKubernetesResources)