[SPARK-19926][PYSPARK] make captured exception from JVM side user friendly
### What changes were proposed in this pull request? The str of `CapaturedException` is now returned by str(self.desc) rather than repr(self.desc), which is more user-friendly. It also handles unicode under python2 specially. ### Why are the changes needed? This is an improvement, and makes exception more human readable in python side. ### Does this PR introduce any user-facing change? Before this pr, select `中文字段` throws exception something likes below: ``` Traceback (most recent call last): File "/Users/advancedxy/code_workspace/github/spark/python/pyspark/sql/tests/test_utils.py", line 34, in test_capture_user_friendly_exception raise e AnalysisException: u"cannot resolve '`\u4e2d\u6587\u5b57\u6bb5`' given input columns: []; line 1 pos 7;\n'Project ['\u4e2d\u6587\u5b57\u6bb5]\n+- OneRowRelation\n" ``` after this pr: ``` Traceback (most recent call last): File "/Users/advancedxy/code_workspace/github/spark/python/pyspark/sql/tests/test_utils.py", line 34, in test_capture_user_friendly_exception raise e AnalysisException: cannot resolve '`中文字段`' given input columns: []; line 1 pos 7; 'Project ['中文字段] +- OneRowRelation ``` ### How was this patch Add a new test to verify unicode are correctly converted and manual checks for thrown exceptions. This pr's credits should go to uncleGen and is based on https://github.com/apache/spark/pull/17267 Closes #25814 from advancedxy/python_exception_19926_and_21045. Authored-by: Xianjin YE <advancedxy@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
c2734ab1fc
commit
203bf9e569
|
@ -1,3 +1,4 @@
|
|||
# -*- encoding: utf-8 -*-
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -26,6 +27,12 @@ class UtilsTests(ReusedSQLTestCase):
|
|||
self.assertRaises(AnalysisException, lambda: self.spark.sql("select abc"))
|
||||
self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
|
||||
|
||||
def test_capture_user_friendly_exception(self):
|
||||
try:
|
||||
self.spark.sql("select `中文字段`")
|
||||
except AnalysisException as e:
|
||||
self.assertRegexpMatches(str(e), "cannot resolve '`中文字段`'")
|
||||
|
||||
def test_capture_parse_exception(self):
|
||||
self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#
|
||||
|
||||
import py4j
|
||||
import sys
|
||||
|
||||
|
||||
class CapturedException(Exception):
|
||||
|
@ -25,7 +26,12 @@ class CapturedException(Exception):
|
|||
self.cause = convert_exception(cause) if cause is not None else None
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.desc)
|
||||
desc = self.desc
|
||||
# encode unicode instance for python2 for human readable description
|
||||
if sys.version_info.major < 3 and isinstance(desc, unicode):
|
||||
return str(desc.encode('utf-8'))
|
||||
else:
|
||||
return str(desc)
|
||||
|
||||
|
||||
class AnalysisException(CapturedException):
|
||||
|
|
Loading…
Reference in a new issue