[SPARK-33155][K8S] spark.kubernetes.pyspark.pythonVersion allows only '3'
### What changes were proposed in this pull request? This PR makes `spark.kubernetes.pyspark.pythonVersion` allow only `3`. In other words, it will reject `2` for `Python 2`. - [x] Configuration description and check is updated. - [x] Documentation is updated - [x] Unit test cases are updated. - [x] Docker image script is updated. ### Why are the changes needed? After SPARK-32138, Apache Spark 3.1 dropped Python 2 support. ### Does this PR introduce _any_ user-facing change? Yes, but Python 2 support is already dropped officially. ### How was this patch tested? Pass the CI. Closes #30049 from dongjoon-hyun/SPARK-DROP-PYTHON2. Authored-by: Dongjoon Hyun <dhyun@apple.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
77a8efbc05
commit
8e7c39089f
|
@ -1087,7 +1087,7 @@ See the [configuration page](configuration.html) for information on Spark config
|
|||
<td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
|
||||
<td><code>"3"</code></td>
|
||||
<td>
|
||||
This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3.
|
||||
This sets the major Python version of the docker image used to run the driver and executor containers. Can be 3.
|
||||
</td>
|
||||
<td>2.4.0</td>
|
||||
</tr>
|
||||
|
|
|
@ -285,11 +285,11 @@ private[spark] object Config extends Logging {
|
|||
|
||||
val PYSPARK_MAJOR_PYTHON_VERSION =
|
||||
ConfigBuilder("spark.kubernetes.pyspark.pythonVersion")
|
||||
.doc("This sets the major Python version. Either 2 or 3. (Python2 or Python3)")
|
||||
.doc("This sets the major Python version. Only 3 is available for Python3.")
|
||||
.version("2.4.0")
|
||||
.stringConf
|
||||
.checkValue(pv => List("2", "3").contains(pv),
|
||||
"Ensure that major Python version is either Python2 or Python3")
|
||||
.checkValue(pv => List("3").contains(pv),
|
||||
"Ensure that major Python version is Python3")
|
||||
.createWithDefault("3")
|
||||
|
||||
val KUBERNETES_KERBEROS_KRB5_FILE =
|
||||
|
|
|
@ -43,7 +43,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
|
|||
test("python resource") {
|
||||
val mainResource = "local:/main.py"
|
||||
val sparkConf = new SparkConf(false)
|
||||
.set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
|
||||
val spec = applyFeatureStep(
|
||||
PythonMainAppResource(mainResource),
|
||||
conf = sparkConf,
|
||||
|
@ -58,7 +57,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
|
|||
val envs = spec.pod.container.getEnv.asScala
|
||||
.map { env => (env.getName, env.getValue) }
|
||||
.toMap
|
||||
val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "2")
|
||||
val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "3")
|
||||
assert(envs === expected)
|
||||
}
|
||||
|
||||
|
@ -93,7 +92,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
|
|||
test("SPARK-25355: python resource args with proxy-user") {
|
||||
val mainResource = "local:/main.py"
|
||||
val sparkConf = new SparkConf(false)
|
||||
.set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
|
||||
val spec = applyFeatureStep(
|
||||
PythonMainAppResource(mainResource),
|
||||
conf = sparkConf,
|
||||
|
|
|
@ -44,12 +44,7 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
|
|||
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
|
||||
fi
|
||||
|
||||
if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
|
||||
pyv="$(python -V 2>&1)"
|
||||
export PYTHON_VERSION="${pyv:7}"
|
||||
export PYSPARK_PYTHON="python"
|
||||
export PYSPARK_DRIVER_PYTHON="python"
|
||||
elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
|
||||
if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
|
||||
pyv3="$(python3 -V 2>&1)"
|
||||
export PYTHON_VERSION="${pyv3:7}"
|
||||
export PYSPARK_PYTHON="python3"
|
||||
|
|
|
@ -26,7 +26,6 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
|
|||
test("Test basic decommissioning", k8sTestTag) {
|
||||
sparkAppConf
|
||||
.set(config.DECOMMISSION_ENABLED.key, "true")
|
||||
.set("spark.kubernetes.pyspark.pythonVersion", "3")
|
||||
.set("spark.kubernetes.container.image", pyImage)
|
||||
.set(config.STORAGE_DECOMMISSION_ENABLED.key, "true")
|
||||
.set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true")
|
||||
|
|
|
@ -35,10 +35,9 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
|
|||
isJVM = false)
|
||||
}
|
||||
|
||||
test("Run PySpark with Python3 to test a pyfiles example", k8sTestTag) {
|
||||
test("Run PySpark to test a pyfiles example", k8sTestTag) {
|
||||
sparkAppConf
|
||||
.set("spark.kubernetes.container.image", pyImage)
|
||||
.set("spark.kubernetes.pyspark.pythonVersion", "3")
|
||||
runSparkApplicationAndVerifyCompletion(
|
||||
appResource = PYSPARK_FILES,
|
||||
mainClass = "",
|
||||
|
@ -57,7 +56,6 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
|
|||
test("Run PySpark with memory customization", k8sTestTag) {
|
||||
sparkAppConf
|
||||
.set("spark.kubernetes.container.image", pyImage)
|
||||
.set("spark.kubernetes.pyspark.pythonVersion", "3")
|
||||
.set("spark.kubernetes.memoryOverheadFactor", s"$memOverheadConstant")
|
||||
.set("spark.executor.pyspark.memory", s"${additionalMemory}m")
|
||||
runSparkApplicationAndVerifyCompletion(
|
||||
|
|
|
@ -31,7 +31,7 @@ if __name__ == "__main__":
|
|||
|
||||
from py_container_checks import version_check
|
||||
# Begin of Python container checks
|
||||
version_check(sys.argv[1], 2 if sys.argv[1] == "python" else 3)
|
||||
version_check(sys.argv[1], 3)
|
||||
|
||||
# Check python executable at executors
|
||||
spark.udf.register("get_sys_ver",
|
||||
|
|
Loading…
Reference in a new issue