Provide and expose a default Hadoop Configuration.

Any "hadoop.*" system properties will be passed along into configuration.
2013-01-09 17:03:25 -06:00 · 2013-01-09 17:03:25 -06:00 · e3861ae395
parent 14972141f9
commit e3861ae395
2 changed files with 21 additions and 4 deletions
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@ -187,6 +187,18 @@ class SparkContext(
  private var dagScheduler = new DAGScheduler(taskScheduler)
  /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
  val hadoopConfiguration = {
    val conf = new Configuration()
    // Copy any "hadoop.foo=bar" system properties into conf as "foo=bar"
    for (key <- System.getProperties.keys.asInstanceOf[Set[String]] if key.startsWith("hadoop.")) {
      conf.set(key.substring("hadoop.".length), System.getProperty(key))
    }
    val bufferSize = System.getProperty("spark.buffer.size", "65536")
    conf.set("io.file.buffer.size", bufferSize)
    conf
  }
  // Methods for creating RDDs
  /** Distribute a local Scala collection to form an RDD. */
@ -231,10 +243,8 @@ class SparkContext(
      valueClass: Class[V],
      minSplits: Int = defaultMinSplits
      ) : RDD[(K, V)] = {
-    val conf = new JobConf()
+    val conf = new JobConf(hadoopConfiguration)
    FileInputFormat.setInputPaths(conf, path)
    val bufferSize = System.getProperty("spark.buffer.size", "65536")
    conf.set("io.file.buffer.size", bufferSize)
    new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits)
  }
@ -276,7 +286,7 @@ class SparkContext(
        fm.erasure.asInstanceOf[Class[F]],
        km.erasure.asInstanceOf[Class[K]],
        vm.erasure.asInstanceOf[Class[V]],
-        new Configuration)
+        hadoopConfiguration)
  }
  /**
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/spark/api/java/JavaSparkContext.scala
@ -355,6 +355,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
  def clearFiles() {
    sc.clearFiles()
  }
  /**
   * Returns the Hadoop configuration used for the Hadoop code (e.g. file systems) we reuse.
   */
  def hadoopConfiguration() {
    sc.hadoopConfiguration
  }
 }
 object JavaSparkContext {