[SPARK-5808] [build] Package pyspark files in sbt assembly.
This turned out to be more complicated than I wanted because the layout of python/ doesn't really follow the usual maven conventions. So some extra code is needed to copy just the right things. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #5461 from vanzin/SPARK-5808 and squashes the following commits: 7153dac [Marcelo Vanzin] Only try to create resource dir if it doesn't already exist. ee90e84 [Marcelo Vanzin] [SPARK-5808] [build] Package pyspark files in sbt assembly.
This commit is contained in:
parent
6adb8bcbf0
commit
65774370a1
|
@ -15,7 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File
|
||||
import java.io._
|
||||
|
||||
import scala.util.Properties
|
||||
import scala.collection.JavaConversions._
|
||||
|
@ -166,6 +166,9 @@ object SparkBuild extends PomBuild {
|
|||
/* Enable Assembly for all assembly projects */
|
||||
assemblyProjects.foreach(enable(Assembly.settings))
|
||||
|
||||
/* Package pyspark artifacts in the main assembly. */
|
||||
enable(PySparkAssembly.settings)(assembly)
|
||||
|
||||
/* Enable unidoc only for the root spark project */
|
||||
enable(Unidoc.settings)(spark)
|
||||
|
||||
|
@ -316,6 +319,7 @@ object Hive {
|
|||
}
|
||||
|
||||
object Assembly {
|
||||
import sbtassembly.AssemblyUtils._
|
||||
import sbtassembly.Plugin._
|
||||
import AssemblyKeys._
|
||||
|
||||
|
@ -347,6 +351,60 @@ object Assembly {
|
|||
)
|
||||
}
|
||||
|
||||
object PySparkAssembly {
|
||||
import sbtassembly.Plugin._
|
||||
import AssemblyKeys._
|
||||
|
||||
lazy val settings = Seq(
|
||||
unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
|
||||
// Use a resource generator to copy all .py files from python/pyspark into a managed directory
|
||||
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
|
||||
// list since that will copy unneeded / unwanted files.
|
||||
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
|
||||
val dst = new File(outDir, "pyspark")
|
||||
if (!dst.isDirectory()) {
|
||||
require(dst.mkdirs())
|
||||
}
|
||||
|
||||
val src = new File(BuildCommons.sparkHome, "python/pyspark")
|
||||
copy(src, dst)
|
||||
}
|
||||
)
|
||||
|
||||
private def copy(src: File, dst: File): Seq[File] = {
|
||||
src.listFiles().flatMap { f =>
|
||||
val child = new File(dst, f.getName())
|
||||
if (f.isDirectory()) {
|
||||
child.mkdir()
|
||||
copy(f, child)
|
||||
} else if (f.getName().endsWith(".py")) {
|
||||
var in: Option[FileInputStream] = None
|
||||
var out: Option[FileOutputStream] = None
|
||||
try {
|
||||
in = Some(new FileInputStream(f))
|
||||
out = Some(new FileOutputStream(child))
|
||||
|
||||
val bytes = new Array[Byte](1024)
|
||||
var read = 0
|
||||
while (read >= 0) {
|
||||
read = in.get.read(bytes)
|
||||
if (read > 0) {
|
||||
out.get.write(bytes, 0, read)
|
||||
}
|
||||
}
|
||||
|
||||
Some(child)
|
||||
} finally {
|
||||
in.foreach(_.close())
|
||||
out.foreach(_.close())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object Unidoc {
|
||||
|
||||
import BuildCommons._
|
||||
|
|
Loading…
Reference in a new issue