[SPARK-27610][YARN] Shade netty native libraries

## What changes were proposed in this pull request?

Fixed the `spark-<version>-yarn-shuffle.jar` artifact packaging to shade the native netty libraries:
- shade the `META-INF/native/libnetty_*` native libraries when packagin
the yarn shuffle service jar. This is required as netty library loader
derives that based on shaded package name.
- updated the `org/spark_project` shade package prefix to `org/sparkproject`
(i.e. removed underscore) as the former breaks the netty native lib loading.

This was causing the yarn external shuffle service to fail
when spark.shuffle.io.mode=EPOLL

## How was this patch tested?
Manual tests

Closes #24502 from amuraru/SPARK-27610_master.

Authored-by: Adi Muraru <amuraru@adobe.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
This commit is contained in:
Adi Muraru 2019-05-07 10:47:36 -07:00 committed by Marcelo Vanzin
parent d124ce9c7e
commit 8ef4da753d
18 changed files with 68 additions and 21 deletions

View file

@ -35,7 +35,7 @@
<!-- Make sure all Hadoop dependencies are provided to avoid repackaging. -->
<hadoop.deps.scope>provided</hadoop.deps.scope>
<shuffle.jar>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</shuffle.jar>
<shade>org/spark_project/</shade>
<shade>org/sparkproject/</shade>
</properties>
<dependencies>
@ -128,6 +128,50 @@
</execution>
</executions>
</plugin>
<!-- shade the native netty libs as well -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>regex-property</id>
<goals>
<goal>regex-property</goal>
</goals>
<configuration>
<name>spark.shade.native.packageName</name>
<value>${spark.shade.packageName}</value>
<regex>\.</regex>
<replacement>_</replacement>
<failIfNoMatch>true</failIfNoMatch>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>unpack</id>
<phase>package</phase>
<configuration>
<target>
<echo message="Shade netty native libraries to ${spark.shade.native.packageName}" />
<unzip src="${shuffle.jar}" dest="${project.build.directory}/exploded/" />
<move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_epoll_x86_64.so"
tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_x86_64.so" />
<move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_kqueue_x86_64.jnilib"
tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_kqueue_x86_64.jnilib" />
<jar destfile="${shuffle.jar}" basedir="${project.build.directory}/exploded" />
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- probes to validate that those dependencies which must be shaded are -->
<plugin>

View file

@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
log4j.logger.org.apache.spark.repl.Main=WARN
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.sparkproject.jetty=WARN
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.parquet=ERROR

View file

@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
log4j.logger.org.apache.spark.repl.Main=WARN
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.sparkproject.jetty=WARN
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View file

@ -33,4 +33,4 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%t: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -31,7 +31,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.sparkproject.jetty=WARN
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View file

@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -34,4 +34,4 @@ log4j.logger.org.apache.spark.launcher.app.outputredirtest=INFO, outputredirtest
log4j.logger.org.apache.spark.launcher.app.outputredirtest.additivity=false
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -24,5 +24,5 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -202,7 +202,7 @@
<test.include.tags></test.include.tags>
<!-- Package to use when relocating shaded classes. -->
<spark.shade.packageName>org.spark_project</spark.shade.packageName>
<spark.shade.packageName>org.sparkproject</spark.shade.packageName>
<!-- Modules that copy jars to the build directory should do so under this location. -->
<jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>

View file

@ -441,7 +441,9 @@ object MimaExcludes {
// [SPARK-15526][ML][FOLLOWUP] Make JPMML provided scope to avoid including unshaded JARs
(problem: Problem) => problem match {
case MissingClassProblem(cls) =>
!cls.fullName.startsWith("org.spark_project.jpmml") &&
!cls.fullName.startsWith("org.sparkproject.jpmml") &&
!cls.fullName.startsWith("org.sparkproject.dmg.pmml") &&
!cls.fullName.startsWith("org.spark_project.jpmml") &&
!cls.fullName.startsWith("org.spark_project.dmg.pmml")
case _ => true
}
@ -716,6 +718,7 @@ object MimaExcludes {
ProblemFilters.exclude[Problem]("org.apache.spark.rpc.*"),
ProblemFilters.exclude[Problem]("org.spark-project.jetty.*"),
ProblemFilters.exclude[Problem]("org.spark_project.jetty.*"),
ProblemFilters.exclude[Problem]("org.sparkproject.jetty.*"),
ProblemFilters.exclude[Problem]("org.apache.spark.internal.*"),
ProblemFilters.exclude[Problem]("org.apache.spark.unused.*"),
ProblemFilters.exclude[Problem]("org.apache.spark.unsafe.*"),

View file

@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.mortbay=WARN
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.mortbay=WARN
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.mortbay=WARN
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -52,7 +52,7 @@ abstract class BaseYarnClusterSuite
|log4j.logger.org.apache.hadoop=WARN
|log4j.logger.org.eclipse.jetty=WARN
|log4j.logger.org.mortbay=WARN
|log4j.logger.org.spark_project.jetty=WARN
|log4j.logger.org.sparkproject.jetty=WARN
""".stripMargin
private var yarnCluster: MiniYARNCluster = _

View file

@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

View file

@ -24,5 +24,5 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN