[SPARK-24542][SQL] UDF series UDFXPathXXXX allow users to pass carefully crafted XML to access arbitrary files
## What changes were proposed in this pull request? UDF series UDFXPathXXXX allow users to pass carefully crafted XML to access arbitrary files. Spark does not have built-in access control. When users use the external access control library, users might bypass them and access the file contents. This PR basically patches the Hive fix to Apache Spark. https://issues.apache.org/jira/browse/HIVE-18879 ## How was this patch tested? A unit test case Author: Xiao Li <gatorsmile@gmail.com> Closes #21549 from gatorsmile/xpathSecurity.
This commit is contained in:
parent
1737d45e08
commit
9a75c18290
|
@ -21,6 +21,9 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpression;
|
||||
|
@ -37,9 +40,15 @@ import org.xml.sax.InputSource;
|
|||
* This is based on Hive's UDFXPathUtil implementation.
|
||||
*/
|
||||
public class UDFXPathUtil {
|
||||
public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
|
||||
public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
|
||||
public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
|
||||
private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
private DocumentBuilder builder = null;
|
||||
private XPath xpath = XPathFactory.newInstance().newXPath();
|
||||
private ReusableStringReader reader = new ReusableStringReader();
|
||||
private InputSource inputSource = new InputSource(reader);
|
||||
|
||||
private XPathExpression expression = null;
|
||||
private String oldPath = null;
|
||||
|
||||
|
@ -65,14 +74,31 @@ public class UDFXPathUtil {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (builder == null){
|
||||
try {
|
||||
initializeDocumentBuilderFactory();
|
||||
builder = dbf.newDocumentBuilder();
|
||||
} catch (ParserConfigurationException e) {
|
||||
throw new RuntimeException(
|
||||
"Error instantiating DocumentBuilder, cannot build xml parser", e);
|
||||
}
|
||||
}
|
||||
|
||||
reader.set(xml);
|
||||
try {
|
||||
return expression.evaluate(inputSource, qname);
|
||||
return expression.evaluate(builder.parse(inputSource), qname);
|
||||
} catch (XPathExpressionException e) {
|
||||
throw new RuntimeException("Invalid XML document: " + e.getMessage() + "\n" + xml, e);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
|
||||
dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
|
||||
dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
|
||||
}
|
||||
|
||||
public Boolean evalBoolean(String xml, String path) throws XPathExpressionException {
|
||||
return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
|
||||
}
|
||||
|
|
|
@ -77,6 +77,27 @@ class UDFXPathUtilSuite extends SparkFunSuite {
|
|||
assert(ret == "foo")
|
||||
}
|
||||
|
||||
test("embedFailure") {
|
||||
import org.apache.commons.io.FileUtils
|
||||
import java.io.File
|
||||
val secretValue = String.valueOf(Math.random)
|
||||
val tempFile = File.createTempFile("verifyembed", ".tmp")
|
||||
tempFile.deleteOnExit()
|
||||
val fname = tempFile.getAbsolutePath
|
||||
|
||||
FileUtils.writeStringToFile(tempFile, secretValue)
|
||||
|
||||
val xml =
|
||||
s"""<?xml version="1.0" encoding="utf-8"?>
|
||||
|<!DOCTYPE test [
|
||||
| <!ENTITY embed SYSTEM "$fname">
|
||||
|]>
|
||||
|<foo>&embed;</foo>
|
||||
""".stripMargin
|
||||
val evaled = new UDFXPathUtil().evalString(xml, "/foo")
|
||||
assert(evaled.isEmpty)
|
||||
}
|
||||
|
||||
test("number eval") {
|
||||
var ret =
|
||||
util.evalNumber("<a><b>true</b><b>false</b><b>b3</b><c>c1</c><c>-77</c></a>", "a/c[2]")
|
||||
|
|
|
@ -40,8 +40,9 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
|
||||
// Test error message for invalid XML document
|
||||
val e1 = intercept[RuntimeException] { testExpr("<a>/a>", "a", null.asInstanceOf[T]) }
|
||||
assert(e1.getCause.getMessage.contains("Invalid XML document") &&
|
||||
e1.getCause.getMessage.contains("<a>/a>"))
|
||||
assert(e1.getCause.getCause.getMessage.contains(
|
||||
"XML document structures must start and end within the same entity."))
|
||||
assert(e1.getMessage.contains("<a>/a>"))
|
||||
|
||||
// Test error message for invalid xpath
|
||||
val e2 = intercept[RuntimeException] { testExpr("<a></a>", "!#$", null.asInstanceOf[T]) }
|
||||
|
|
Loading…
Reference in a new issue