f958f27e20
Use UTF-8 to encode the name of column in Python 2, or it may failed to encode with default encoding ('ascii'). This PR also fix a bug when there is Java exception without error message. Author: Davies Liu <davies@databricks.com> Closes #7165 from davies/non_ascii and squashes the following commits: 02cb61a [Davies Liu] fix tests 3b09d31 [Davies Liu] add encoding in header 867754a [Davies Liu] support non-ascii character in column names
55 lines
2 KiB
Python
55 lines
2 KiB
Python
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import py4j
|
|
|
|
|
|
class AnalysisException(Exception):
|
|
"""
|
|
Failed to analyze a SQL query plan.
|
|
"""
|
|
|
|
|
|
def capture_sql_exception(f):
|
|
def deco(*a, **kw):
|
|
try:
|
|
return f(*a, **kw)
|
|
except py4j.protocol.Py4JJavaError as e:
|
|
s = e.java_exception.toString()
|
|
if s.startswith('org.apache.spark.sql.AnalysisException: '):
|
|
raise AnalysisException(s.split(': ', 1)[1])
|
|
raise
|
|
return deco
|
|
|
|
|
|
def install_exception_handler():
|
|
"""
|
|
Hook an exception handler into Py4j, which could capture some SQL exceptions in Java.
|
|
|
|
When calling Java API, it will call `get_return_value` to parse the returned object.
|
|
If any exception happened in JVM, the result will be Java exception object, it raise
|
|
py4j.protocol.Py4JJavaError. We replace the original `get_return_value` with one that
|
|
could capture the Java exception and throw a Python one (with the same error message).
|
|
|
|
It's idempotent, could be called multiple times.
|
|
"""
|
|
original = py4j.protocol.get_return_value
|
|
# The original `get_return_value` is not patched, it's idempotent.
|
|
patched = capture_sql_exception(original)
|
|
# only patch the one used in in py4j.java_gateway (call Java API)
|
|
py4j.java_gateway.get_return_value = patched
|