[SPARK-33155][K8S] spark.kubernetes.pyspark.pythonVersion allows only '3'

### What changes were proposed in this pull request?

This PR makes `spark.kubernetes.pyspark.pythonVersion` allow only `3`. In other words, it will reject `2` for `Python 2`.
- [x] Configuration description and check is updated.
- [x] Documentation is updated
- [x] Unit test cases are updated.
- [x] Docker image script is updated.

### Why are the changes needed?

After SPARK-32138, Apache Spark 3.1 dropped Python 2 support.

### Does this PR introduce _any_ user-facing change?

Yes, but Python 2 support is already dropped officially.

### How was this patch tested?

Pass the CI.

Closes #30049 from dongjoon-hyun/SPARK-DROP-PYTHON2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Dongjoon Hyun 2020-10-15 01:51:01 -07:00
parent 77a8efbc05
commit 8e7c39089f
7 changed files with 8 additions and 18 deletions

View file

@ -1087,7 +1087,7 @@ See the [configuration page](configuration.html) for information on Spark config
<td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
<td><code>"3"</code></td>
<td>
This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3.
This sets the major Python version of the docker image used to run the driver and executor containers. Can be 3.
</td>
<td>2.4.0</td>
</tr>

View file

@ -285,11 +285,11 @@ private[spark] object Config extends Logging {
val PYSPARK_MAJOR_PYTHON_VERSION =
ConfigBuilder("spark.kubernetes.pyspark.pythonVersion")
.doc("This sets the major Python version. Either 2 or 3. (Python2 or Python3)")
.doc("This sets the major Python version. Only 3 is available for Python3.")
.version("2.4.0")
.stringConf
.checkValue(pv => List("2", "3").contains(pv),
"Ensure that major Python version is either Python2 or Python3")
.checkValue(pv => List("3").contains(pv),
"Ensure that major Python version is Python3")
.createWithDefault("3")
val KUBERNETES_KERBEROS_KRB5_FILE =

View file

@ -43,7 +43,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
test("python resource") {
val mainResource = "local:/main.py"
val sparkConf = new SparkConf(false)
.set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
val spec = applyFeatureStep(
PythonMainAppResource(mainResource),
conf = sparkConf,
@ -58,7 +57,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
val envs = spec.pod.container.getEnv.asScala
.map { env => (env.getName, env.getValue) }
.toMap
val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "2")
val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "3")
assert(envs === expected)
}
@ -93,7 +92,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
test("SPARK-25355: python resource args with proxy-user") {
val mainResource = "local:/main.py"
val sparkConf = new SparkConf(false)
.set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
val spec = applyFeatureStep(
PythonMainAppResource(mainResource),
conf = sparkConf,

View file

@ -44,12 +44,7 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
fi
if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
pyv="$(python -V 2>&1)"
export PYTHON_VERSION="${pyv:7}"
export PYSPARK_PYTHON="python"
export PYSPARK_DRIVER_PYTHON="python"
elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
pyv3="$(python3 -V 2>&1)"
export PYTHON_VERSION="${pyv3:7}"
export PYSPARK_PYTHON="python3"

View file

@ -26,7 +26,6 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
test("Test basic decommissioning", k8sTestTag) {
sparkAppConf
.set(config.DECOMMISSION_ENABLED.key, "true")
.set("spark.kubernetes.pyspark.pythonVersion", "3")
.set("spark.kubernetes.container.image", pyImage)
.set(config.STORAGE_DECOMMISSION_ENABLED.key, "true")
.set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true")

View file

@ -35,10 +35,9 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
isJVM = false)
}
test("Run PySpark with Python3 to test a pyfiles example", k8sTestTag) {
test("Run PySpark to test a pyfiles example", k8sTestTag) {
sparkAppConf
.set("spark.kubernetes.container.image", pyImage)
.set("spark.kubernetes.pyspark.pythonVersion", "3")
runSparkApplicationAndVerifyCompletion(
appResource = PYSPARK_FILES,
mainClass = "",
@ -57,7 +56,6 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
test("Run PySpark with memory customization", k8sTestTag) {
sparkAppConf
.set("spark.kubernetes.container.image", pyImage)
.set("spark.kubernetes.pyspark.pythonVersion", "3")
.set("spark.kubernetes.memoryOverheadFactor", s"$memOverheadConstant")
.set("spark.executor.pyspark.memory", s"${additionalMemory}m")
runSparkApplicationAndVerifyCompletion(

View file

@ -31,7 +31,7 @@ if __name__ == "__main__":
from py_container_checks import version_check
# Begin of Python container checks
version_check(sys.argv[1], 2 if sys.argv[1] == "python" else 3)
version_check(sys.argv[1], 3)
# Check python executable at executors
spark.udf.register("get_sys_ver",