[SPARK-31580][BUILD] Upgrade Apache ORC to 1.5.10
### What changes were proposed in this pull request? This PR aims to upgrade Apache ORC to 1.5.10. ### Why are the changes needed? Apache ORC 1.5.10 is a maintenance release with the following patches. - [ORC-621](https://issues.apache.org/jira/browse/ORC-621) Need reader fix for ORC-569 - [ORC-616](https://issues.apache.org/jira/browse/ORC-616) In Patched Base encoding, the value of headerThirdByte goes beyond the range of byte - [ORC-613](https://issues.apache.org/jira/browse/ORC-613) OrcMapredRecordReader mis-reuse struct object when actual children schema differs - [ORC-610](https://issues.apache.org/jira/browse/ORC-610) Updated Copyright year in the NOTICE file The following is release note. - https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12318320&version=12346912 ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Pass the Jenkins with the existing ORC tests and a newly added test case. - The first commit is already tested in `hive-2.3` profile with both native ORC implementation and Hive 2.3 ORC implementation. (https://github.com/apache/spark/pull/28373#issuecomment-620265114) - The latest run is about to make the test case disable in `hive-1.2` profile which doesn't use Apache ORC. - `hive-1.2`: https://github.com/apache/spark/pull/28373#issuecomment-620325906 Closes #28373 from dongjoon-hyun/SPARK-ORC-1.5.10. Authored-by: Dongjoon Hyun <dongjoon@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
2f4f38b6f1
commit
79eaaaf6da
|
@ -160,9 +160,9 @@ objenesis/2.5.1//objenesis-2.5.1.jar
|
|||
okhttp/3.12.6//okhttp-3.12.6.jar
|
||||
okio/1.15.0//okio-1.15.0.jar
|
||||
opencsv/2.3//opencsv-2.3.jar
|
||||
orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar
|
||||
orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar
|
||||
orc-shims/1.5.9//orc-shims-1.5.9.jar
|
||||
orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar
|
||||
orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar
|
||||
orc-shims/1.5.10//orc-shims-1.5.10.jar
|
||||
oro/2.0.8//oro-2.0.8.jar
|
||||
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
|
||||
paranamer/2.8//paranamer-2.8.jar
|
||||
|
|
|
@ -175,9 +175,9 @@ objenesis/2.5.1//objenesis-2.5.1.jar
|
|||
okhttp/3.12.6//okhttp-3.12.6.jar
|
||||
okio/1.15.0//okio-1.15.0.jar
|
||||
opencsv/2.3//opencsv-2.3.jar
|
||||
orc-core/1.5.9//orc-core-1.5.9.jar
|
||||
orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
|
||||
orc-shims/1.5.9//orc-shims-1.5.9.jar
|
||||
orc-core/1.5.10//orc-core-1.5.10.jar
|
||||
orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
|
||||
orc-shims/1.5.10//orc-shims-1.5.10.jar
|
||||
oro/2.0.8//oro-2.0.8.jar
|
||||
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
|
||||
paranamer/2.8//paranamer-2.8.jar
|
||||
|
|
|
@ -190,9 +190,9 @@ okhttp/2.7.5//okhttp-2.7.5.jar
|
|||
okhttp/3.12.6//okhttp-3.12.6.jar
|
||||
okio/1.15.0//okio-1.15.0.jar
|
||||
opencsv/2.3//opencsv-2.3.jar
|
||||
orc-core/1.5.9//orc-core-1.5.9.jar
|
||||
orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
|
||||
orc-shims/1.5.9//orc-shims-1.5.9.jar
|
||||
orc-core/1.5.10//orc-core-1.5.10.jar
|
||||
orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
|
||||
orc-shims/1.5.10//orc-shims-1.5.10.jar
|
||||
oro/2.0.8//oro-2.0.8.jar
|
||||
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
|
||||
paranamer/2.8//paranamer-2.8.jar
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -135,7 +135,7 @@
|
|||
<kafka.version>2.5.0</kafka.version>
|
||||
<derby.version>10.12.1.1</derby.version>
|
||||
<parquet.version>1.10.1</parquet.version>
|
||||
<orc.version>1.5.9</orc.version>
|
||||
<orc.version>1.5.10</orc.version>
|
||||
<orc.classifier></orc.classifier>
|
||||
<hive.parquet.group>com.twitter</hive.parquet.group>
|
||||
<hive.parquet.version>1.6.0</hive.parquet.version>
|
||||
|
|
Binary file not shown.
|
@ -589,4 +589,10 @@ class OrcSourceSuite extends OrcSuite with SharedSparkSession {
|
|||
test("SPARK-11412 read and merge orc schemas in parallel") {
|
||||
testMergeSchemasInParallel(OrcUtils.readOrcSchemasInParallel)
|
||||
}
|
||||
|
||||
test("SPARK-31580: Read a file written before ORC-569") {
|
||||
// Test ORC file came from ORC-621
|
||||
val df = readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc")
|
||||
assert(df.where("str < 'row 001000'").count() === 1000)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -320,4 +320,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-31580: Read a file written before ORC-569") {
|
||||
assume(HiveUtils.isHive23) // Hive 1.2 doesn't use Apache ORC
|
||||
// Test ORC file came from ORC-621
|
||||
val df = readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc")
|
||||
assert(df.where("str < 'row 001000'").count() === 1000)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue