[SPARK-27054][BUILD][SQL] Remove the Calcite dependency

## What changes were proposed in this pull request?

Calcite is only used for [runSqlHive](02bbe977ab/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala (L699-L705)) when `hive.cbo.enable=true`([SemanticAnalyzer](https://github.com/apache/hive/blob/release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java#L278-L280)).
So we can disable `hive.cbo.enable` and remove Calcite dependency.

## How was this patch tested?

Exist tests

Closes #23970 from wangyum/SPARK-27054.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Yuming Wang <wgyumg@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Yuming Wang 2019-03-09 16:34:24 -08:00 committed by Dongjoon Hyun
parent 6e1c0827ec
commit f732647ae4
8 changed files with 37 additions and 106 deletions

View file

@ -260,9 +260,6 @@ net.sf.supercsv:super-csv
org.apache.arrow:arrow-format
org.apache.arrow:arrow-memory
org.apache.arrow:arrow-vector
org.apache.calcite:calcite-avatica
org.apache.calcite:calcite-core
org.apache.calcite:calcite-linq4j
org.apache.commons:commons-crypto
org.apache.commons:commons-lang3
org.apache.hadoop:hadoop-annotations

View file

@ -792,15 +792,6 @@ Copyright 2005-2006 The Apache Software Foundation
Apache Jakarta HttpClient
Copyright 1999-2007 The Apache Software Foundation
Calcite Avatica
Copyright 2012-2015 The Apache Software Foundation
Calcite Core
Copyright 2012-2015 The Apache Software Foundation
Calcite Linq4j
Copyright 2012-2015 The Apache Software Foundation
Apache HttpClient
Copyright 1999-2017 The Apache Software Foundation

View file

@ -24,9 +24,6 @@ avro-mapred-1.8.2-hadoop2.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.12-0.13.2.jar
breeze_2.12-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.9.3.jar
chill_2.12-0.9.3.jar
commons-beanutils-1.7.0.jar
@ -57,7 +54,6 @@ datanucleus-api-jdo-3.2.6.jar
datanucleus-core-3.2.10.jar
datanucleus-rdbms-3.2.9.jar
derby-10.12.1.1.jar
eigenbase-properties-1.1.5.jar
flatbuffers-java-1.9.0.jar
generex-1.0.1.jar
gson-2.2.4.jar

View file

@ -22,9 +22,6 @@ avro-mapred-1.8.2-hadoop2.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.12-0.13.2.jar
breeze_2.12-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.9.3.jar
chill_2.12-0.9.3.jar
commons-beanutils-1.9.3.jar
@ -56,7 +53,6 @@ datanucleus-rdbms-3.2.9.jar
derby-10.12.1.1.jar
dnsjava-2.1.7.jar
ehcache-3.3.1.jar
eigenbase-properties-1.1.5.jar
flatbuffers-java-1.9.0.jar
generex-1.0.1.jar
geronimo-jcache_1.0_spec-1.0-alpha-1.jar

72
pom.xml
View file

@ -168,7 +168,6 @@
<fasterxml.jackson.version>2.9.8</fasterxml.jackson.version>
<snappy.version>1.1.7.1</snappy.version>
<netlib.java.version>1.1.2</netlib.java.version>
<calcite.version>1.2.0-incubating</calcite.version>
<commons-codec.version>1.10</commons-codec.version>
<commons-io.version>2.4</commons-io.version>
<!-- org.apache.commons/commons-lang/-->
@ -1467,11 +1466,15 @@
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
</exclusion>
<!-- this is needed and must be explicitly included later-->
<!-- Do not need Calcite because we disabled hive.cbo.enable -->
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>apache-curator</artifactId>
@ -1841,71 +1844,6 @@
<version>${hive.parquet.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<version>${calcite.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
<!-- hsqldb interferes with the use of derby as the default db
in hive's use of datanucleus.
-->
<exclusion>
<groupId>org.hsqldb</groupId>
<artifactId>hsqldb</artifactId>
</exclusion>
<exclusion>
<groupId>org.pentaho</groupId>
<artifactId>pentaho-aggdesigner-algorithm</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
<version>${calcite.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>

View file

@ -129,14 +129,6 @@
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>

View file

@ -178,6 +178,8 @@ private[hive] class HiveClientImpl(
""".stripMargin)
hiveConf.set(k, v)
}
// Disable CBO because we removed the Calcite dependency.
hiveConf.setBoolean("hive.cbo.enable", false)
val state = new SessionState(hiveConf)
if (clientLoader.cachedHive != null) {
Hive.set(clientLoader.cachedHive.asInstanceOf[Hive])

View file

@ -29,19 +29,20 @@ package object client {
case object v12 extends HiveVersion("0.12.0")
case object v13 extends HiveVersion("0.13.1")
// Hive 0.14 depends on calcite 0.9.2-incubating-SNAPSHOT which does not exist in
// maven central anymore, so override those with a version that exists.
// Do not need Calcite because we disabled hive.cbo.enable.
//
// The other excluded dependencies are also nowhere to be found, so exclude them explicitly. If
// The other excluded dependencies are nowhere to be found, so exclude them explicitly. If
// they're needed by the metastore client, users will have to dig them out of somewhere and use
// configuration to point Spark at the correct jars.
case object v14 extends HiveVersion("0.14.0",
extraDeps = Seq("org.apache.calcite:calcite-core:1.3.0-incubating",
"org.apache.calcite:calcite-avatica:1.3.0-incubating"),
exclusions = Seq("org.pentaho:pentaho-aggdesigner-algorithm"))
exclusions = Seq("org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-avatica",
"org.pentaho:pentaho-aggdesigner-algorithm"))
case object v1_0 extends HiveVersion("1.0.0",
exclusions = Seq("eigenbase:eigenbase-properties",
"org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-avatica",
"org.pentaho:pentaho-aggdesigner-algorithm",
"net.hydromatic:linq4j",
"net.hydromatic:quidem"))
@ -51,6 +52,8 @@ package object client {
// and fails.
case object v1_1 extends HiveVersion("1.1.0",
exclusions = Seq("eigenbase:eigenbase-properties",
"org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm",
"net.hydromatic:linq4j",
@ -58,32 +61,48 @@ package object client {
case object v1_2 extends HiveVersion("1.2.2",
exclusions = Seq("eigenbase:eigenbase-properties",
"org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm",
"net.hydromatic:linq4j",
"net.hydromatic:quidem"))
case object v2_0 extends HiveVersion("2.0.1",
exclusions = Seq("org.apache.curator:*",
exclusions = Seq("org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))
case object v2_1 extends HiveVersion("2.1.1",
exclusions = Seq("org.apache.curator:*",
exclusions = Seq("org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))
case object v2_2 extends HiveVersion("2.2.0",
exclusions = Seq("org.apache.curator:*",
exclusions = Seq("org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))
// Since HIVE-14496, Hive materialized view need calcite-core.
// For spark, only VersionsSuite currently creates a hive materialized view for testing.
case object v2_3 extends HiveVersion("2.3.4",
exclusions = Seq("org.apache.curator:*",
exclusions = Seq("org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))
// Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
// Since HIVE-14496, Hive.java uses calcite-core
case object v3_1 extends HiveVersion("3.1.1",
extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
"org.apache.derby:derby:10.14.1.0"),
exclusions = Seq("org.apache.curator:*",
exclusions = Seq("org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"org.apache.curator:*",
"org.pentaho:pentaho-aggdesigner-algorithm"))
val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1)