[SPARK-26349][PYSPARK] Forbid insecure py4j gateways

Spark always creates secure py4j connections between java and python,
but it also allows users to pass in their own connection. This ensures
that even passed in connections are secure.

Added test cases verifying the failure with a (mocked) insecure gateway.

This is closely related to SPARK-26019, but this entirely forbids the
insecure connection, rather than creating the "escape-hatch".

Closes #23441 from squito/SPARK-26349.

Authored-by: Imran Rashid <irashid@cloudera.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
This commit is contained in:
Imran Rashid 2019-01-08 11:26:36 -08:00 committed by Bryan Cutler
parent e103c4a5e7
commit 32515d205a
2 changed files with 15 additions and 0 deletions

View file

@ -115,6 +115,11 @@ class SparkContext(object):
ValueError:...
"""
self._callsite = first_spark_call() or CallSite(None, None, None)
if gateway is not None and gateway.gateway_parameters.auth_token is None:
raise ValueError(
"You are trying to pass an insecure Py4j gateway to Spark. This"
" is not allowed as it is a security risk.")
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
try:
self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,

View file

@ -20,6 +20,7 @@ import tempfile
import threading
import time
import unittest
from collections import namedtuple
from pyspark import SparkFiles, SparkContext
from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest, SPARK_HOME
@ -246,6 +247,15 @@ class ContextTests(unittest.TestCase):
with SparkContext() as sc:
self.assertGreater(sc.startTime, 0)
def test_forbid_insecure_gateway(self):
# Fail immediately if you try to create a SparkContext
# with an insecure gateway
parameters = namedtuple('MockGatewayParameters', 'auth_token')(None)
mock_insecure_gateway = namedtuple('MockJavaGateway', 'gateway_parameters')(parameters)
with self.assertRaises(ValueError) as context:
SparkContext(gateway=mock_insecure_gateway)
self.assertIn("insecure Py4j gateway", str(context.exception))
if __name__ == "__main__":
from pyspark.tests.test_context import *