[SPARK-35992][BUILD] Upgrade ORC to 1.6.9
### What changes were proposed in this pull request? This PR aims to upgrade Apache ORC to 1.6.9. ### Why are the changes needed? This is required to bring ORC-804 in order to fix ORC encryption masking bug. ### Does this PR introduce _any_ user-facing change? No. This is not released yet. ### How was this patch tested? Pass the newly added test case. Closes #33189 from dongjoon-hyun/SPARK-35992. Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org> Co-authored-by: Dongjoon Hyun <dhyun@apple.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
77696448db
commit
c55b9fd1e0
|
@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
|
|||
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
|
||||
ST4/4.0.4//ST4-4.0.4.jar
|
||||
activation/1.1.1//activation-1.1.1.jar
|
||||
aircompressor/0.16//aircompressor-0.16.jar
|
||||
aircompressor/0.19//aircompressor-0.19.jar
|
||||
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
|
||||
annotations/17.0.0//annotations-17.0.0.jar
|
||||
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
|
||||
|
@ -196,9 +196,9 @@ objenesis/2.6//objenesis-2.6.jar
|
|||
okhttp/3.12.12//okhttp-3.12.12.jar
|
||||
okio/1.14.0//okio-1.14.0.jar
|
||||
opencsv/2.3//opencsv-2.3.jar
|
||||
orc-core/1.6.8//orc-core-1.6.8.jar
|
||||
orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar
|
||||
orc-shims/1.6.8//orc-shims-1.6.8.jar
|
||||
orc-core/1.6.9//orc-core-1.6.9.jar
|
||||
orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar
|
||||
orc-shims/1.6.9//orc-shims-1.6.9.jar
|
||||
oro/2.0.8//oro-2.0.8.jar
|
||||
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
|
||||
paranamer/2.8//paranamer-2.8.jar
|
||||
|
|
|
@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
|
|||
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
|
||||
ST4/4.0.4//ST4-4.0.4.jar
|
||||
activation/1.1.1//activation-1.1.1.jar
|
||||
aircompressor/0.16//aircompressor-0.16.jar
|
||||
aircompressor/0.19//aircompressor-0.19.jar
|
||||
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
|
||||
annotations/17.0.0//annotations-17.0.0.jar
|
||||
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
|
||||
|
@ -167,9 +167,9 @@ objenesis/2.6//objenesis-2.6.jar
|
|||
okhttp/3.12.12//okhttp-3.12.12.jar
|
||||
okio/1.14.0//okio-1.14.0.jar
|
||||
opencsv/2.3//opencsv-2.3.jar
|
||||
orc-core/1.6.8//orc-core-1.6.8.jar
|
||||
orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar
|
||||
orc-shims/1.6.8//orc-shims-1.6.8.jar
|
||||
orc-core/1.6.9//orc-core-1.6.9.jar
|
||||
orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar
|
||||
orc-shims/1.6.9//orc-shims-1.6.9.jar
|
||||
oro/2.0.8//oro-2.0.8.jar
|
||||
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
|
||||
paranamer/2.8//paranamer-2.8.jar
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -137,7 +137,7 @@
|
|||
<!-- After 10.15.1.3, the minimum required version is JDK9 -->
|
||||
<derby.version>10.14.2.0</derby.version>
|
||||
<parquet.version>1.12.0</parquet.version>
|
||||
<orc.version>1.6.8</orc.version>
|
||||
<orc.version>1.6.9</orc.version>
|
||||
<jetty.version>9.4.42.v20210604</jetty.version>
|
||||
<jakartaservlet.version>4.0.3</jakartaservlet.version>
|
||||
<chill.version>0.10.0</chill.version>
|
||||
|
|
|
@ -159,4 +159,70 @@ class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-35992: Write and read fully-encrypted columns with default masking") {
|
||||
val conf = spark.sessionState.newHadoopConf()
|
||||
val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)
|
||||
assume(!provider.getKeyNames.isEmpty,
|
||||
s"$provider doesn't has the test keys. ORC shim is created with old Hadoop libraries")
|
||||
|
||||
val df = originalData.toDF("ssn", "email", "name")
|
||||
|
||||
withTempPath { dir =>
|
||||
val path = dir.getAbsolutePath
|
||||
withSQLConf(
|
||||
"hadoop.security.key.provider.path" -> "test:///",
|
||||
"orc.key.provider" -> "hadoop",
|
||||
"orc.encrypt" -> "pii:ssn,email,name") {
|
||||
df.write.mode("overwrite").orc(path)
|
||||
checkAnswer(spark.read.orc(path), df)
|
||||
}
|
||||
|
||||
withSQLConf(
|
||||
"orc.key.provider" -> "memory",
|
||||
"orc.encrypt" -> "pii:ssn,email,name") {
|
||||
checkAnswer(spark.read.orc(path), Row(null, null, null))
|
||||
}
|
||||
}
|
||||
|
||||
val originalNestedData = Row(1, Row("123456789", "dongjoon@apache.org", "Dongjoon"))
|
||||
|
||||
withTempDir { dir =>
|
||||
val path = dir.getAbsolutePath
|
||||
withTable("encrypted") {
|
||||
sql(
|
||||
s"""
|
||||
|CREATE TABLE encrypted (
|
||||
| id INT,
|
||||
| contact struct<ssn:STRING, email:STRING, name:STRING>
|
||||
|)
|
||||
|USING ORC
|
||||
|LOCATION "$path"
|
||||
|OPTIONS (
|
||||
| hadoop.security.key.provider.path "test:///",
|
||||
| orc.key.provider "hadoop",
|
||||
| orc.encrypt "pii:id,contact"
|
||||
|)
|
||||
|""".stripMargin)
|
||||
sql("INSERT INTO encrypted VALUES(1, ('123456789', 'dongjoon@apache.org', 'Dongjoon'))")
|
||||
checkAnswer(sql("SELECT * FROM encrypted"), originalNestedData)
|
||||
}
|
||||
withTable("normal") {
|
||||
sql(
|
||||
s"""
|
||||
|CREATE TABLE normal (
|
||||
| id INT,
|
||||
| contact struct<ssn:STRING, email:STRING, name:STRING>
|
||||
|)
|
||||
|USING ORC
|
||||
|LOCATION "$path"
|
||||
|OPTIONS (
|
||||
| orc.key.provider "memory"
|
||||
|)
|
||||
|""".stripMargin)
|
||||
checkAnswer(sql("SELECT * FROM normal"), Row(null, null))
|
||||
checkAnswer(sql("SELECT id, contact.* FROM normal"), Row(null, null, null, null))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue