[SPARK-35992][BUILD] Upgrade ORC to 1.6.9

### What changes were proposed in this pull request?

This PR aims to upgrade Apache ORC to 1.6.9.

### Why are the changes needed?

This is required to bring ORC-804 in order to fix ORC encryption masking bug.

### Does this PR introduce _any_ user-facing change?

No. This is not released yet.

### How was this patch tested?

Pass the newly added test case.

Closes #33189 from dongjoon-hyun/SPARK-35992.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
Dongjoon Hyun 2021-07-02 09:49:49 -07:00
parent 77696448db
commit c55b9fd1e0
4 changed files with 75 additions and 9 deletions

View file

@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
ST4/4.0.4//ST4-4.0.4.jar
activation/1.1.1//activation-1.1.1.jar
aircompressor/0.16//aircompressor-0.16.jar
aircompressor/0.19//aircompressor-0.19.jar
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
annotations/17.0.0//annotations-17.0.0.jar
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
@ -196,9 +196,9 @@ objenesis/2.6//objenesis-2.6.jar
okhttp/3.12.12//okhttp-3.12.12.jar
okio/1.14.0//okio-1.14.0.jar
opencsv/2.3//opencsv-2.3.jar
orc-core/1.6.8//orc-core-1.6.8.jar
orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar
orc-shims/1.6.8//orc-shims-1.6.8.jar
orc-core/1.6.9//orc-core-1.6.9.jar
orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar
orc-shims/1.6.9//orc-shims-1.6.9.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar

View file

@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
ST4/4.0.4//ST4-4.0.4.jar
activation/1.1.1//activation-1.1.1.jar
aircompressor/0.16//aircompressor-0.16.jar
aircompressor/0.19//aircompressor-0.19.jar
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
annotations/17.0.0//annotations-17.0.0.jar
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
@ -167,9 +167,9 @@ objenesis/2.6//objenesis-2.6.jar
okhttp/3.12.12//okhttp-3.12.12.jar
okio/1.14.0//okio-1.14.0.jar
opencsv/2.3//opencsv-2.3.jar
orc-core/1.6.8//orc-core-1.6.8.jar
orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar
orc-shims/1.6.8//orc-shims-1.6.8.jar
orc-core/1.6.9//orc-core-1.6.9.jar
orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar
orc-shims/1.6.9//orc-shims-1.6.9.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar

View file

@ -137,7 +137,7 @@
<!-- After 10.15.1.3, the minimum required version is JDK9 -->
<derby.version>10.14.2.0</derby.version>
<parquet.version>1.12.0</parquet.version>
<orc.version>1.6.8</orc.version>
<orc.version>1.6.9</orc.version>
<jetty.version>9.4.42.v20210604</jetty.version>
<jakartaservlet.version>4.0.3</jakartaservlet.version>
<chill.version>0.10.0</chill.version>

View file

@ -159,4 +159,70 @@ class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
}
}
}
test("SPARK-35992: Write and read fully-encrypted columns with default masking") {
val conf = spark.sessionState.newHadoopConf()
val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)
assume(!provider.getKeyNames.isEmpty,
s"$provider doesn't has the test keys. ORC shim is created with old Hadoop libraries")
val df = originalData.toDF("ssn", "email", "name")
withTempPath { dir =>
val path = dir.getAbsolutePath
withSQLConf(
"hadoop.security.key.provider.path" -> "test:///",
"orc.key.provider" -> "hadoop",
"orc.encrypt" -> "pii:ssn,email,name") {
df.write.mode("overwrite").orc(path)
checkAnswer(spark.read.orc(path), df)
}
withSQLConf(
"orc.key.provider" -> "memory",
"orc.encrypt" -> "pii:ssn,email,name") {
checkAnswer(spark.read.orc(path), Row(null, null, null))
}
}
val originalNestedData = Row(1, Row("123456789", "dongjoon@apache.org", "Dongjoon"))
withTempDir { dir =>
val path = dir.getAbsolutePath
withTable("encrypted") {
sql(
s"""
|CREATE TABLE encrypted (
| id INT,
| contact struct<ssn:STRING, email:STRING, name:STRING>
|)
|USING ORC
|LOCATION "$path"
|OPTIONS (
| hadoop.security.key.provider.path "test:///",
| orc.key.provider "hadoop",
| orc.encrypt "pii:id,contact"
|)
|""".stripMargin)
sql("INSERT INTO encrypted VALUES(1, ('123456789', 'dongjoon@apache.org', 'Dongjoon'))")
checkAnswer(sql("SELECT * FROM encrypted"), originalNestedData)
}
withTable("normal") {
sql(
s"""
|CREATE TABLE normal (
| id INT,
| contact struct<ssn:STRING, email:STRING, name:STRING>
|)
|USING ORC
|LOCATION "$path"
|OPTIONS (
| orc.key.provider "memory"
|)
|""".stripMargin)
checkAnswer(sql("SELECT * FROM normal"), Row(null, null))
checkAnswer(sql("SELECT id, contact.* FROM normal"), Row(null, null, null, null))
}
}
}
}