[SPARK-11836][SQL] udf/cast should not create new SQLContext
They should use the existing SQLContext. Author: Davies Liu <davies@databricks.com> Closes #9914 from davies/create_udf.
This commit is contained in:
parent
1b6e938be8
commit
1d91202010
|
@ -346,9 +346,10 @@ class Column(object):
|
|||
if isinstance(dataType, basestring):
|
||||
jc = self._jc.cast(dataType)
|
||||
elif isinstance(dataType, DataType):
|
||||
sc = SparkContext._active_spark_context
|
||||
ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
|
||||
jdt = ssql_ctx.parseDataType(dataType.json())
|
||||
from pyspark.sql import SQLContext
|
||||
sc = SparkContext.getOrCreate()
|
||||
ctx = SQLContext.getOrCreate(sc)
|
||||
jdt = ctx._ssql_ctx.parseDataType(dataType.json())
|
||||
jc = self._jc.cast(jdt)
|
||||
else:
|
||||
raise TypeError("unexpected type: %s" % type(dataType))
|
||||
|
|
|
@ -1457,14 +1457,15 @@ class UserDefinedFunction(object):
|
|||
self._judf = self._create_judf(name)
|
||||
|
||||
def _create_judf(self, name):
|
||||
from pyspark.sql import SQLContext
|
||||
f, returnType = self.func, self.returnType # put them in closure `func`
|
||||
func = lambda _, it: map(lambda x: returnType.toInternal(f(*x)), it)
|
||||
ser = AutoBatchedSerializer(PickleSerializer())
|
||||
command = (func, None, ser, ser)
|
||||
sc = SparkContext._active_spark_context
|
||||
sc = SparkContext.getOrCreate()
|
||||
pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command, self)
|
||||
ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
|
||||
jdt = ssql_ctx.parseDataType(self.returnType.json())
|
||||
ctx = SQLContext.getOrCreate(sc)
|
||||
jdt = ctx._ssql_ctx.parseDataType(self.returnType.json())
|
||||
if name is None:
|
||||
name = f.__name__ if hasattr(f, '__name__') else f.__class__.__name__
|
||||
judf = sc._jvm.UserDefinedPythonFunction(name, bytearray(pickled_command), env, includes,
|
||||
|
|
Loading…
Reference in a new issue