[SPARK-13579][BUILD] Stop building the main Spark assembly.

This change modifies the "assembly/" module to just copy needed dependencies to its build directory, and modifies the packaging script to pick those up (and remove duplicate jars packages in the examples module). I also made some minor adjustments to dependencies to remove some test jars from the final packaging, and remove jars that conflict with each other when packaged separately (e.g. servlet api). Also note that this change restores guava in applications' classpaths, even though it's still shaded inside Spark. This is now needed for the Hadoop libraries that are packaged with Spark, which now are not processed by the shade plugin. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #11796 from vanzin/SPARK-13579.
2016-04-04 16:52:21 -07:00 · 2016-04-04 16:52:21 -07:00 · 24d7d2e453
parent 400b2f863f
commit 24d7d2e453
26 changed files with 231 additions and 319 deletions
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@ -33,9 +33,8 @@

  <properties>
    <sbt.project.name>assembly</sbt.project.name>
-    <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
-    <spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
-    <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
+    <build.testJarPhase>none</build.testJarPhase>
+    <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
  </properties>

  <dependencies>
@ -69,6 +68,17 @@
      <artifactId>spark-repl_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
    </dependency>
+
+    <!--
+      Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
+      that the libraries Spark depend on have it available. We'll package the version that Spark
+      uses (14.0.1) which is not the same as Hadoop dependencies, but works.
+    -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
  </dependencies>

  <build>
@ -87,75 +97,26 @@
          <skip>true</skip>
        </configuration>
      </plugin>
-        <!-- zip pyspark archives to run python application on yarn mode -->
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-antrun-plugin</artifactId>
-            <executions>
-              <execution>
-                <phase>package</phase>
-                  <goals>
-                    <goal>run</goal>
-                  </goals>
-              </execution>
-            </executions>
-            <configuration>
-              <target>
-                <delete dir="${basedir}/../python/lib/pyspark.zip"/>
-                <zip destfile="${basedir}/../python/lib/pyspark.zip">
-                  <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
-                </zip>
-              </target>
-            </configuration>
-        </plugin>
-      <!-- Use the shade plugin to create a big JAR with all the dependencies -->
+      <!-- zip pyspark archives to run python application on yarn mode -->
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <configuration>
-          <shadedArtifactAttached>false</shadedArtifactAttached>
-          <outputFile>${spark.jar}</outputFile>
-          <artifactSet>
-            <includes>
-              <include>*:*</include>
-            </includes>
-          </artifactSet>
-          <filters>
-            <filter>
-              <artifact>*:*</artifact>
-              <excludes>
-                <exclude>org/datanucleus/**</exclude>
-                <exclude>META-INF/*.SF</exclude>
-                <exclude>META-INF/*.DSA</exclude>
-                <exclude>META-INF/*.RSA</exclude>
-              </excludes>
-            </filter>
-          </filters>
-        </configuration>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>reference.conf</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
-                  <resource>log4j.properties</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
-              </transformers>
-            </configuration>
-          </execution>
-        </executions>
+          <artifactId>maven-antrun-plugin</artifactId>
+          <executions>
+            <execution>
+              <phase>package</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <target>
+              <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+              <zip destfile="${basedir}/../python/lib/pyspark.zip">
+                <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
+              </zip>
+            </target>
+          </configuration>
      </plugin>
    </plugins>
  </build>
--- a/bin/spark-class
+++ b/bin/spark-class
@ -36,21 +36,20 @@ else
 fi

 # Find Spark jars.
-# TODO: change the directory name when Spark jars move from "lib".
 if [ -f "${SPARK_HOME}/RELEASE" ]; then
-  SPARK_JARS_DIR="${SPARK_HOME}/lib"
+  SPARK_JARS_DIR="${SPARK_HOME}/jars"
 else
-  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
+  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
 fi

-if [ ! -d "$SPARK_JARS_DIR" ]; then
+if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then
  echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
  echo "You need to build Spark before running this program." 1>&2
  exit 1
+else
+  LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
 fi

-LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
-
 # Add the launcher build dir to the classpath if requested.
 if [ -n "$SPARK_PREPEND_CLASSES" ]; then
  LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@ -29,11 +29,10 @@ if "x%1"=="x" (
 )

 rem Find Spark jars.
-rem TODO: change the directory name when Spark jars move from "lib".
 if exist "%SPARK_HOME%\RELEASE" (
-  set SPARK_JARS_DIR="%SPARK_HOME%\lib"
+  set SPARK_JARS_DIR="%SPARK_HOME%\jars"
 ) else (
-  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
+  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars"
 )

 if not exist "%SPARK_JARS_DIR%"\ (
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@ -1121,9 +1121,9 @@ private[spark] object Utils extends Logging {
      extraEnvironment: Map[String, String] = Map.empty,
      redirectStderr: Boolean = true): String = {
    val process = executeCommand(command, workingDir, extraEnvironment, redirectStderr)
-    val output = new StringBuffer
+    val output = new StringBuilder
    val threadName = "read stdout for " + command(0)
-    def appendToOutput(s: String): Unit = output.append(s)
+    def appendToOutput(s: String): Unit = output.append(s).append("\n")
    val stdoutThread = processStreamByLine(threadName, process.getInputStream, appendToOutput)
    val exitCode = process.waitFor()
    stdoutThread.join()   // Wait for it to finish reading output
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@ -201,24 +201,29 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {

    // Make sure only logging errors
    val logger = Logger.getRootLogger
+    val oldLogLevel = logger.getLevel
    logger.setLevel(Level.ERROR)
-    logger.addAppender(mockAppender)
+    try {
+      logger.addAppender(mockAppender)

-    val testOutputStream = new PipedOutputStream()
-    val testInputStream = new PipedInputStream(testOutputStream)
+      val testOutputStream = new PipedOutputStream()
+      val testInputStream = new PipedInputStream(testOutputStream)

-    // Close the stream before appender tries to read will cause an IOException
-    testInputStream.close()
-    testOutputStream.close()
-    val appender = FileAppender(testInputStream, testFile, new SparkConf)
+      // Close the stream before appender tries to read will cause an IOException
+      testInputStream.close()
+      testOutputStream.close()
+      val appender = FileAppender(testInputStream, testFile, new SparkConf)

-    appender.awaitTermination()
+      appender.awaitTermination()

-    // If InputStream was closed without first stopping the appender, an exception will be logged
-    verify(mockAppender, atLeast(1)).doAppend(loggingEventCaptor.capture)
-    val loggingEvent = loggingEventCaptor.getValue
-    assert(loggingEvent.getThrowableInformation !== null)
-    assert(loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+      // If InputStream was closed without first stopping the appender, an exception will be logged
+      verify(mockAppender, atLeast(1)).doAppend(loggingEventCaptor.capture)
+      val loggingEvent = loggingEventCaptor.getValue
+      assert(loggingEvent.getThrowableInformation !== null)
+      assert(loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+    } finally {
+      logger.setLevel(oldLogLevel)
+    }
  }

  test("file appender async close stream gracefully") {
@ -228,30 +233,35 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {

    // Make sure only logging errors
    val logger = Logger.getRootLogger
+    val oldLogLevel = logger.getLevel
    logger.setLevel(Level.ERROR)
-    logger.addAppender(mockAppender)
+    try {
+      logger.addAppender(mockAppender)

-    val testOutputStream = new PipedOutputStream()
-    val testInputStream = new PipedInputStream(testOutputStream) with LatchedInputStream
+      val testOutputStream = new PipedOutputStream()
+      val testInputStream = new PipedInputStream(testOutputStream) with LatchedInputStream

-    // Close the stream before appender tries to read will cause an IOException
-    testInputStream.close()
-    testOutputStream.close()
-    val appender = FileAppender(testInputStream, testFile, new SparkConf)
+      // Close the stream before appender tries to read will cause an IOException
+      testInputStream.close()
+      testOutputStream.close()
+      val appender = FileAppender(testInputStream, testFile, new SparkConf)

-    // Stop the appender before an IOException is called during read
-    testInputStream.latchReadStarted.await()
-    appender.stop()
-    testInputStream.latchReadProceed.countDown()
+      // Stop the appender before an IOException is called during read
+      testInputStream.latchReadStarted.await()
+      appender.stop()
+      testInputStream.latchReadProceed.countDown()

-    appender.awaitTermination()
+      appender.awaitTermination()

-    // Make sure no IOException errors have been logged as a result of appender closing gracefully
-    verify(mockAppender, atLeast(0)).doAppend(loggingEventCaptor.capture)
-    import scala.collection.JavaConverters._
-    loggingEventCaptor.getAllValues.asScala.foreach { loggingEvent =>
-      assert(loggingEvent.getThrowableInformation === null
-        || !loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+      // Make sure no IOException errors have been logged as a result of appender closing gracefully
+      verify(mockAppender, atLeast(0)).doAppend(loggingEventCaptor.capture)
+      import scala.collection.JavaConverters._
+      loggingEventCaptor.getAllValues.asScala.foreach { loggingEvent =>
+        assert(loggingEvent.getThrowableInformation === null
+          || !loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException])
+      }
+    } finally {
+      logger.setLevel(oldLogLevel)
    }
  }

--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@ -12,7 +12,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
@ -61,6 +60,7 @@ grizzly-http-2.1.2.jar
 grizzly-http-server-2.1.2.jar
 grizzly-http-servlet-2.1.2.jar
 grizzly-rcm-2.1.2.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.2.0.jar
@ -164,7 +164,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@ -177,7 +176,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
 xz-1.0.jar
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@ -12,7 +12,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@ -56,6 +55,7 @@ eigenbase-properties-1.1.5.jar
 geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.3.0.jar
@ -155,7 +155,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@ -168,7 +167,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
 xz-1.0.jar
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@ -12,7 +12,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@ -56,6 +55,7 @@ eigenbase-properties-1.1.5.jar
 geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.4.0.jar
@ -156,7 +156,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@ -169,7 +168,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
 xz-1.0.jar
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@ -16,7 +16,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@ -61,6 +60,7 @@ geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
 gson-2.2.4.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.6.0.jar
@ -162,7 +162,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@ -175,7 +174,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@ -16,7 +16,6 @@ asm-3.1.jar
 asm-commons-3.1.jar
 asm-tree-3.1.jar
 avro-1.7.7.jar
-avro-ipc-1.7.7-tests.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
@ -61,6 +60,7 @@ geronimo-annotation_1.0_spec-1.1.1.jar
 geronimo-jaspic_1.0_spec-1.0.jar
 geronimo-jta_1.1_spec-1.1.1.jar
 gson-2.2.4.jar
+guava-14.0.1.jar
 guice-3.0.jar
 guice-servlet-3.0.jar
 hadoop-annotations-2.7.0.jar
@ -163,7 +163,6 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-servlet-api-2.5.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
@ -176,7 +175,6 @@ stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-1.5.6.jar
-unused-1.0.0.jar
 xbean-asm5-shaded-4.4.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@ -160,28 +160,35 @@ echo -e "\$ ${BUILD_COMMAND[@]}\n"

 # Make directories
 rm -rf "$DISTDIR"
-mkdir -p "$DISTDIR/lib"
+mkdir -p "$DISTDIR/jars"
 echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
 echo "Build flags: $@" >> "$DISTDIR/RELEASE"

 # Copy jars
-cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
-# This will fail if the -Pyarn profile is not provided
-# In this case, silence the error and ignore the return code of this command
-cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
+cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
+
+# Only create the yarn directory if the yarn artifacts were build.
+if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
+  mkdir "$DISTDIR"/yarn
+  cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
+fi

 # Copy examples and dependencies
 mkdir -p "$DISTDIR/examples/jars"
 cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"

+# Deduplicate jars that have already been packaged as part of the main Spark dependencies.
+for f in "$DISTDIR/examples/jars/"*; do
+  name=$(basename "$f")
+  if [ -f "$DISTDIR/jars/$name" ]; then
+    rm "$DISTDIR/examples/jars/$name"
+  fi
+done
+
 # Copy example sources (needed for python and SQL)
 mkdir -p "$DISTDIR/examples/src/main"
 cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/"

-if [ "$SPARK_HIVE" == "1" ]; then
-  cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
-fi
-
 # Copy license and ASF files
 cp "$SPARK_HOME/LICENSE" "$DISTDIR"
 cp -r "$SPARK_HOME/licenses" "$DISTDIR"
--- a/dev/mima
+++ b/dev/mima
@ -25,8 +25,8 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"

 SPARK_PROFILES="-Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
-TOOLS_CLASSPATH="$(build/sbt "export tools/fullClasspath" | tail -n1)"
-OLD_DEPS_CLASSPATH="$(build/sbt $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
+TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
+OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"

 rm -f .generated-mima*

@ -36,7 +36,7 @@ java \
  -cp "$TOOLS_CLASSPATH:$OLD_DEPS_CLASSPATH" \
  org.apache.spark.tools.GenerateMIMAIgnore

-echo -e "q\n" | build/sbt mimaReportBinaryIssues | grep -v -e "info.*Resolving"
+echo -e "q\n" | build/sbt -DcopyDependencies=false "$@" mimaReportBinaryIssues | grep -v -e "info.*Resolving"
 ret_val=$?

 if [ $ret_val != 0 ]; then
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@ -350,7 +350,7 @@ def build_spark_sbt(hadoop_version):
 def build_spark_assembly_sbt(hadoop_version):
    # Enable all of the profiles for the build:
    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
-    sbt_goals = ["assembly/assembly"]
+    sbt_goals = ["assembly/package"]
    profiles_and_goals = build_profiles + sbt_goals
    print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
          " ".join(profiles_and_goals))
@ -371,9 +371,10 @@ def build_apache_spark(build_tool, hadoop_version):
        build_spark_sbt(hadoop_version)


-def detect_binary_inop_with_mima():
+def detect_binary_inop_with_mima(hadoop_version):
+    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
    set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
-    run_cmd([os.path.join(SPARK_HOME, "dev", "mima")])
+    run_cmd([os.path.join(SPARK_HOME, "dev", "mima")] + build_profiles)


 def run_scala_tests_maven(test_profiles):
@ -571,8 +572,8 @@ def main():
    # backwards compatibility checks
    if build_tool == "sbt":
        # Note: compatibility tests only supported in sbt for now
-        detect_binary_inop_with_mima()
-        # Since we did not build assembly/assembly before running dev/mima, we need to
+        detect_binary_inop_with_mima(hadoop_version)
+        # Since we did not build assembly/package before running dev/mima, we need to
        # do it here because the tests still rely on it; see SPARK-13294 for details.
        build_spark_assembly_sbt(hadoop_version)

--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@ -1687,12 +1687,7 @@ on all of the worker nodes, as they will need access to the Hive serialization a
 (SerDes) in order to access data stored in Hive.

 Configuration of Hive is done by placing your `hive-site.xml`, `core-site.xml` (for security configuration),
- `hdfs-site.xml` (for HDFS configuration) file in `conf/`. Please note when running
-the query on a YARN cluster (`cluster` mode), the `datanucleus` jars under the `lib` directory
-and `hive-site.xml` under `conf/` directory need to be available on the driver and all executors launched by the
-YARN cluster. The convenient way to do this is adding them through the `--jars` option and `--file` option of the
-`spark-submit` command.
-
+`hdfs-site.xml` (for HDFS configuration) file in `conf/`.

 <div class="codetabs">

--- a/examples/pom.xml
+++ b/examples/pom.xml
@ -27,13 +27,16 @@

  <groupId>org.apache.spark</groupId>
  <artifactId>spark-examples_2.11</artifactId>
-  <properties>
-    <sbt.project.name>examples</sbt.project.name>
-  </properties>
  <packaging>jar</packaging>
  <name>Spark Project Examples</name>
  <url>http://spark.apache.org/</url>

+  <properties>
+    <sbt.project.name>examples</sbt.project.name>
+    <build.testJarPhase>none</build.testJarPhase>
+    <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
+  </properties>
+
  <dependencies>
    <dependency>
      <groupId>org.apache.spark</groupId>
@ -75,23 +78,6 @@
      <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-testing-util</artifactId>
-      <version>${hbase.version}</version>
-      <scope>${hbase.deps.scope}</scope>
-      <exclusions>
-        <exclusion>
-          <!-- SPARK-4455 -->
-          <groupId>org.apache.hbase</groupId>
-          <artifactId>hbase-annotations</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.jruby</groupId>
-          <artifactId>jruby-complete</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-protocol</artifactId>
@ -139,6 +125,10 @@
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-annotations</artifactId>
        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-common</artifactId>
+        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-core</artifactId>
@ -208,13 +198,6 @@
      <version>${hbase.version}</version>
      <scope>${hbase.deps.scope}</scope>
    </dependency>
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-hadoop-compat</artifactId>
-      <version>${hbase.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-math3</artifactId>
@ -294,17 +277,6 @@
      <artifactId>scopt_${scala.binary.version}</artifactId>
      <version>3.3.0</version>
    </dependency>
-
-    <!--
-      The following dependencies are already present in the Spark assembly, so we want to force
-      them to be provided.
-    -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
  </dependencies>

  <build>
@ -325,38 +297,6 @@
          <skip>true</skip>
        </configuration>
      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>prepare-test-jar</id>
-            <phase>none</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <outputDirectory>${jars.target.dir}</outputDirectory>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>copy-dependencies</goal>
-            </goals>
-            <configuration>
-              <includeScope>runtime</includeScope>
-              <outputDirectory>${jars.target.dir}</outputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
    </plugins>
  </build>
  <profiles>
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@ -144,10 +144,26 @@ abstract class AbstractCommandBuilder {
    boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
    if (prependClasses || isTesting) {
      String scala = getScalaVersion();
-      List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
-        "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
-        "yarn", "launcher",
-        "common/network-common", "common/network-shuffle", "common/network-yarn");
+      List<String> projects = Arrays.asList(
+        "common/network-common",
+        "common/network-shuffle",
+        "common/network-yarn",
+        "common/sketch",
+        "common/tags",
+        "common/unsafe",
+        "core",
+        "examples",
+        "graphx",
+        "launcher",
+        "mllib",
+        "repl",
+        "sql/catalyst",
+        "sql/core",
+        "sql/hive",
+        "sql/hive-thriftserver",
+        "streaming",
+        "yarn"
+      );
      if (prependClasses) {
        if (!isTesting) {
          System.err.println(
@ -174,31 +190,12 @@ abstract class AbstractCommandBuilder {
    // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and
    // propagate the test classpath appropriately. For normal invocation, look for the jars
    // directory under SPARK_HOME.
-    String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting);
+    boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING"));
+    String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql);
    if (jarsDir != null) {
      addToClassPath(cp, join(File.separator, jarsDir, "*"));
    }

-    // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
-    // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
-    // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive
-    File libdir;
-    if (new File(sparkHome, "RELEASE").isFile()) {
-      libdir = new File(sparkHome, "lib");
-    } else {
-      libdir = new File(sparkHome, "lib_managed/jars");
-    }
-
-    if (libdir.isDirectory()) {
-      for (File jar : libdir.listFiles()) {
-        if (jar.getName().startsWith("datanucleus-")) {
-          addToClassPath(cp, jar.getAbsolutePath());
-        }
-      }
-    } else {
-      checkState(isTesting, "Library directory '%s' does not exist.", libdir.getAbsolutePath());
-    }
-
    addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
    addToClassPath(cp, getenv("YARN_CONF_DIR"));
    addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH"));
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@ -358,12 +358,12 @@ class CommandBuilderUtils {
    // TODO: change to the correct directory once the assembly build is changed.
    File libdir;
    if (new File(sparkHome, "RELEASE").isFile()) {
-      libdir = new File(sparkHome, "lib");
+      libdir = new File(sparkHome, "jars");
      checkState(!failIfNotFound || libdir.isDirectory(),
        "Library directory '%s' does not exist.",
        libdir.getAbsolutePath());
    } else {
-      libdir = new File(sparkHome, String.format("assembly/target/scala-%s", scalaVersion));
+      libdir = new File(sparkHome, String.format("assembly/target/scala-%s/jars", scalaVersion));
      if (!libdir.isDirectory()) {
        checkState(!failIfNotFound,
          "Library directory '%s' does not exist; make sure Spark is built.",
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@ -336,6 +336,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
  }

  private List<String> findExamplesJars() {
+    boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
    List<String> examplesJars = new ArrayList<>();
    String sparkHome = getSparkHome();

@ -346,11 +347,15 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
      jarsDir = new File(sparkHome,
        String.format("examples/target/scala-%s/jars", getScalaVersion()));
    }
-    checkState(jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.",
+
+    boolean foundDir = jarsDir.isDirectory();
+    checkState(isTesting || foundDir, "Examples jars directory '%s' does not exist.",
        jarsDir.getAbsolutePath());

-    for (File f: jarsDir.listFiles()) {
-      examplesJars.add(f.getAbsolutePath());
+    if (foundDir) {
+      for (File f: jarsDir.listFiles()) {
+        examplesJars.add(f.getAbsolutePath());
+      }
    }
    return examplesJars;
  }
--- a/pom.xml
+++ b/pom.xml
@ -185,6 +185,10 @@
    <!-- Modules that copy jars to the build directory should do so under this location. -->
    <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>

+    <!-- Allow modules to enable / disable certain build plugins easily. -->
+    <build.testJarPhase>prepare-package</build.testJarPhase>
+    <build.copyDependenciesPhase>none</build.copyDependenciesPhase>
+
    <!--
      Dependency scopes that can be overridden by enabling certain profiles. These profiles are
      declared in the projects that build assemblies.
@ -237,15 +241,6 @@
    </pluginRepository>
  </pluginRepositories>
  <dependencies>
-    <!--
-      This is a dummy dependency that is used along with the shading plug-in
-      to create effective poms on publishing (see SPARK-3812).
-    -->
-    <dependency>
-      <groupId>org.spark-project.spark</groupId>
-      <artifactId>unused</artifactId>
-      <version>1.0.0</version>
-    </dependency>
    <!--
         This is needed by the scalatest plugin, and so is declared here to be available in
         all child modules, just as scalatest is run in all children
@ -833,6 +828,14 @@
          </exclusion>
        </exclusions>
      </dependency>
+      <!-- avro-mapred for some reason depends on avro-ipc's test jar, so undo that. -->
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-ipc</artifactId>
+        <classifier>tests</classifier>
+        <version>${avro.version}</version>
+        <scope>test</scope>
+      </dependency>
      <dependency>
        <groupId>org.apache.avro</groupId>
        <artifactId>avro-mapred</artifactId>
@ -1521,6 +1524,10 @@
            <groupId>org.codehaus.groovy</groupId>
            <artifactId>groovy-all</artifactId>
          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
        </exclusions>
      </dependency>

@ -1916,6 +1923,7 @@
              -->
              <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
              <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+              <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
              <SPARK_TESTING>1</SPARK_TESTING>
              <JAVA_HOME>${test.java.home}</JAVA_HOME>
            </environmentVariables>
@ -1964,6 +1972,7 @@
              -->
              <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
              <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
+              <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
              <SPARK_TESTING>1</SPARK_TESTING>
              <JAVA_HOME>${test.java.home}</JAVA_HOME>
            </environmentVariables>
@ -2146,6 +2155,7 @@
        <version>2.10</version>
        <executions>
          <execution>
+            <id>generate-test-classpath</id>
            <phase>test-compile</phase>
            <goals>
              <goal>build-classpath</goal>
@ -2155,6 +2165,17 @@
              <outputProperty>test_classpath</outputProperty>
            </configuration>
          </execution>
+          <execution>
+            <id>copy-module-dependencies</id>
+            <phase>${build.copyDependenciesPhase}</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <includeScope>runtime</includeScope>
+              <outputDirectory>${jars.target.dir}</outputDirectory>
+            </configuration>
+          </execution>
        </executions>
      </plugin>

@ -2169,9 +2190,6 @@
          <shadedArtifactAttached>false</shadedArtifactAttached>
          <artifactSet>
            <includes>
-              <!-- At a minimum we must include this to force effective pom generation -->
-              <include>org.spark-project.spark:unused</include>
-
              <include>org.eclipse.jetty:jetty-io</include>
              <include>org.eclipse.jetty:jetty-http</include>
              <include>org.eclipse.jetty:jetty-continuation</include>
@ -2302,7 +2320,7 @@
        <executions>
          <execution>
            <id>prepare-test-jar</id>
-            <phase>prepare-package</phase>
+            <phase>${build.testJarPhase}</phase>
            <goals>
              <goal>test-jar</goal>
            </goals>
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@ -57,11 +57,12 @@ object BuildCommons {
    Seq("yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl",
      "docker-integration-tests").map(ProjectRef(buildLocation, _))

-  val assemblyProjects@Seq(assembly, networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) =
-    Seq("assembly", "network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
+  val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) =
+    Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
      .map(ProjectRef(buildLocation, _))

-  val copyJarsProjects@Seq(examples) = Seq("examples").map(ProjectRef(buildLocation, _))
+  val copyJarsProjects@Seq(assembly, examples) = Seq("assembly", "examples")
+    .map(ProjectRef(buildLocation, _))

  val tools = ProjectRef(buildLocation, "tools")
  // Root project.
@ -263,8 +264,14 @@ object SparkBuild extends PomBuild {
  /* Unsafe settings */
  enable(Unsafe.settings)(unsafe)

-  /* Set up tasks to copy dependencies during packaging. */
-  copyJarsProjects.foreach(enable(CopyDependencies.settings))
+  /*
+   * Set up tasks to copy dependencies during packaging. This step can be disabled in the command
+   * line, so that dev/mima can run without trying to copy these files again and potentially
+   * causing issues.
+   */
+  if (!"false".equals(System.getProperty("copyDependencies"))) {
+    copyJarsProjects.foreach(enable(CopyDependencies.settings))
+  }

  /* Enable Assembly for all assembly projects */
  assemblyProjects.foreach(enable(Assembly.settings))
@ -477,8 +484,6 @@ object Assembly {

  val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")

-  val deployDatanucleusJars = taskKey[Unit]("Deploy datanucleus jars to the spark/lib_managed/jars directory")
-
  lazy val settings = assemblySettings ++ Seq(
    test in assembly := {},
    hadoopVersion := {
@ -497,27 +502,13 @@ object Assembly {
      s"${mName}-test-${v}.jar"
    },
    mergeStrategy in assembly := {
-      case PathList("org", "datanucleus", xs @ _*)             => MergeStrategy.discard
      case m if m.toLowerCase.endsWith("manifest.mf")          => MergeStrategy.discard
      case m if m.toLowerCase.matches("meta-inf.*\\.sf$")      => MergeStrategy.discard
      case "log4j.properties"                                  => MergeStrategy.discard
      case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
      case "reference.conf"                                    => MergeStrategy.concat
      case _                                                   => MergeStrategy.first
-    },
-    deployDatanucleusJars := {
-      val jars: Seq[File] = (fullClasspath in assembly).value.map(_.data)
-        .filter(_.getPath.contains("org.datanucleus"))
-      var libManagedJars = new File(BuildCommons.sparkHome, "lib_managed/jars")
-      libManagedJars.mkdirs()
-      jars.foreach { jar =>
-        val dest = new File(libManagedJars, jar.getName)
-        if (!dest.exists()) {
-          Files.copy(jar.toPath, dest.toPath)
-        }
-      }
-    },
-    assembly <<= assembly.dependsOn(deployDatanucleusJars)
+    }
  )
 }

@ -698,6 +689,13 @@ object Java8TestSettings {
 object TestSettings {
  import BuildCommons._

+  private val scalaBinaryVersion =
+    if (System.getProperty("scala-2.10") == "true") {
+      "2.10"
+    } else {
+      "2.11"
+    }
+
  lazy val settings = Seq (
    // Fork new JVMs for tests and set Java options for those
    fork := true,
@ -707,6 +705,7 @@ object TestSettings {
      "SPARK_DIST_CLASSPATH" ->
        (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
      "SPARK_PREPEND_CLASSES" -> "1",
+      "SPARK_SCALA_VERSION" -> scalaBinaryVersion,
      "SPARK_TESTING" -> "1",
      "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
    javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir",
@ -744,7 +743,7 @@ object TestSettings {
    // Make sure the test temp directory exists.
    resourceGenerators in Test <+= resourceManaged in Test map { outDir: File =>
      if (!new File(testTempDir).isDirectory()) {
-        require(new File(testTempDir).mkdirs())
+        require(new File(testTempDir).mkdirs(), s"Error creating temp directory $testTempDir.")
      }
      Seq[File]()
    },
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@ -1482,7 +1482,7 @@ def search_kafka_assembly_jar():
        raise Exception(
            ("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) +
            "You need to build Spark with "
-            "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or "
+            "'build/sbt assembly/package streaming-kafka-assembly/assembly' or "
            "'build/mvn package' before running this test.")
    elif len(jars) > 1:
        raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: %s; please "
@ -1548,7 +1548,7 @@ if __name__ == "__main__":
    elif are_kinesis_tests_enabled is False:
        sys.stderr.write("Skipping all Kinesis Python tests as the optional Kinesis project was "
                         "not compiled into a JAR. To run these tests, "
-                         "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/assembly "
+                         "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package "
                         "streaming-kinesis-asl-assembly/assembly' or "
                         "'build/mvn -Pkinesis-asl package' before running this test.")
    else:
@ -1556,7 +1556,7 @@ if __name__ == "__main__":
            ("Failed to find Spark Streaming Kinesis assembly jar in %s. "
             % kinesis_asl_assembly_dir) +
            "You need to build Spark with 'build/sbt -Pkinesis-asl "
-            "assembly/assembly streaming-kinesis-asl-assembly/assembly'"
+            "assembly/package streaming-kinesis-asl-assembly/assembly'"
            "or 'build/mvn -Pkinesis-asl package' before running this test.")

    sys.stderr.write("Running tests: %s \n" % (str(testcases)))
--- a/python/run-tests.py
+++ b/python/run-tests.py
@ -53,11 +53,25 @@ LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log")
 FAILURE_REPORTING_LOCK = Lock()
 LOGGER = logging.getLogger()

+# Find out where the assembly jars are located.
+for scala in ["2.11", "2.10"]:
+    build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
+    if os.path.isdir(build_dir):
+        SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
+        break
+else:
+    raise Exception("Cannot find assembly build directory, please build Spark first.")
+

 def run_individual_python_test(test_name, pyspark_python):
    env = dict(os.environ)
-    env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python),
-                'PYSPARK_DRIVER_PYTHON': which(pyspark_python)})
+    env.update({
+        'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH,
+        'SPARK_TESTING': '1',
+        'SPARK_PREPEND_CLASSES': '1',
+        'PYSPARK_PYTHON': which(pyspark_python),
+        'PYSPARK_DRIVER_PYTHON': which(pyspark_python)
+    })
    LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
    start_time = time.time()
    try:
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@ -763,11 +763,15 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
        extraEnvironment = Map(
          // Disables SPARK_TESTING to exclude log4j.properties in test directories.
          "SPARK_TESTING" -> "0",
+          // But set SPARK_SQL_TESTING to make spark-class happy.
+          "SPARK_SQL_TESTING" -> "1",
          // Points SPARK_PID_DIR to SPARK_HOME, otherwise only 1 Thrift server instance can be
          // started at a time, which is not Jenkins friendly.
          "SPARK_PID_DIR" -> pidDir.getCanonicalPath),
        redirectStderr = true)

+      logInfo(s"COMMAND: $command")
+      logInfo(s"OUTPUT: $lines")
      lines.split("\n").collectFirst {
        case line if line.contains(LOG_FILE_MARK) => new File(line.drop(LOG_FILE_MARK.length))
      }.getOrElse {
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@ -225,30 +225,6 @@
          <argLine>-da -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
        </configuration>
      </plugin>
-
-      <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>copy-dependencies</id>
-            <phase>package</phase>
-            <goals>
-              <goal>copy-dependencies</goal>
-            </goals>
-            <configuration>
-              <!-- basedir is spark/sql/hive/ -->
-              <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
-              <overWriteReleases>false</overWriteReleases>
-              <overWriteSnapshots>false</overWriteSnapshots>
-              <overWriteIfNewer>true</overWriteIfNewer>
-              <includeGroupIds>org.datanucleus</includeGroupIds>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
    </plugins>
  </build>
 </project>
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@ -447,9 +447,6 @@ private[spark] class Client(
     *
     * Note that the archive cannot be a "local" URI. If none of the above settings are found,
     * then upload all files found in $SPARK_HOME/jars.
-     *
-     * TODO: currently the code looks in $SPARK_HOME/lib while the work to replace assemblies
-     * with a directory full of jars is ongoing.
     */
    val sparkArchive = sparkConf.get(SPARK_ARCHIVE)
    if (sparkArchive.isDefined) {
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@ -273,7 +273,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll

  test("distribute local spark jars") {
    val temp = Utils.createTempDir()
-    val jarsDir = new File(temp, "lib")
+    val jarsDir = new File(temp, "jars")
    assert(jarsDir.mkdir())
    val jar = TestUtils.createJarWithFiles(Map(), jarsDir)
    new FileOutputStream(new File(temp, "RELEASE")).close()