diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 0d8e0323bf..666fbefbac 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar -aircompressor/0.16//aircompressor-0.16.jar +aircompressor/0.19//aircompressor-0.19.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar @@ -196,9 +196,9 @@ objenesis/2.6//objenesis-2.6.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.6.8//orc-core-1.6.8.jar -orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar -orc-shims/1.6.8//orc-shims-1.6.8.jar +orc-core/1.6.9//orc-core-1.6.9.jar +orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar +orc-shims/1.6.9//orc-shims-1.6.9.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index b7d49384c3..22f522c98d 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar -aircompressor/0.16//aircompressor-0.16.jar +aircompressor/0.19//aircompressor-0.19.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar @@ -167,9 +167,9 @@ objenesis/2.6//objenesis-2.6.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.6.8//orc-core-1.6.8.jar -orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar -orc-shims/1.6.8//orc-shims-1.6.8.jar +orc-core/1.6.9//orc-core-1.6.9.jar +orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar +orc-shims/1.6.9//orc-shims-1.6.9.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/pom.xml b/pom.xml index 48307fd52b..c0ac02439a 100644 --- a/pom.xml +++ b/pom.xml @@ -137,7 +137,7 @@ 10.14.2.0 1.12.0 - 1.6.8 + 1.6.9 9.4.42.v20210604 4.0.3 0.10.0 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala index ed7a11c923..b7d29588f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala @@ -159,4 +159,70 @@ class OrcEncryptionSuite extends OrcTest with SharedSparkSession { } } } + + test("SPARK-35992: Write and read fully-encrypted columns with default masking") { + val conf = spark.sessionState.newHadoopConf() + val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random) + assume(!provider.getKeyNames.isEmpty, + s"$provider doesn't has the test keys. ORC shim is created with old Hadoop libraries") + + val df = originalData.toDF("ssn", "email", "name") + + withTempPath { dir => + val path = dir.getAbsolutePath + withSQLConf( + "hadoop.security.key.provider.path" -> "test:///", + "orc.key.provider" -> "hadoop", + "orc.encrypt" -> "pii:ssn,email,name") { + df.write.mode("overwrite").orc(path) + checkAnswer(spark.read.orc(path), df) + } + + withSQLConf( + "orc.key.provider" -> "memory", + "orc.encrypt" -> "pii:ssn,email,name") { + checkAnswer(spark.read.orc(path), Row(null, null, null)) + } + } + + val originalNestedData = Row(1, Row("123456789", "dongjoon@apache.org", "Dongjoon")) + + withTempDir { dir => + val path = dir.getAbsolutePath + withTable("encrypted") { + sql( + s""" + |CREATE TABLE encrypted ( + | id INT, + | contact struct + |) + |USING ORC + |LOCATION "$path" + |OPTIONS ( + | hadoop.security.key.provider.path "test:///", + | orc.key.provider "hadoop", + | orc.encrypt "pii:id,contact" + |) + |""".stripMargin) + sql("INSERT INTO encrypted VALUES(1, ('123456789', 'dongjoon@apache.org', 'Dongjoon'))") + checkAnswer(sql("SELECT * FROM encrypted"), originalNestedData) + } + withTable("normal") { + sql( + s""" + |CREATE TABLE normal ( + | id INT, + | contact struct + |) + |USING ORC + |LOCATION "$path" + |OPTIONS ( + | orc.key.provider "memory" + |) + |""".stripMargin) + checkAnswer(sql("SELECT * FROM normal"), Row(null, null)) + checkAnswer(sql("SELECT id, contact.* FROM normal"), Row(null, null, null, null)) + } + } + } }