From ba78383bace52b13ee931c6f2df445f721d5080a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 30 Aug 2014 16:58:17 -0700 Subject: [PATCH] SPARK-3318: Documentation update in addFile on how to use SparkFiles.get Rather than specifying the path to SparkFiles we need to use the filename. Author: Holden Karau Closes #2210 from holdenk/SPARK-3318-documentation-for-addfiles-should-say-to-use-file-not-path and squashes the following commits: a25d27a [Holden Karau] Update the JavaSparkContext addFile method to be clear about using fileName with SparkFiles as well 0ebcb05 [Holden Karau] Documentation update in addFile on how to use SparkFiles.get to specify filename rather than path --- core/src/main/scala/org/apache/spark/SparkContext.scala | 3 +-- .../scala/org/apache/spark/api/java/JavaSparkContext.scala | 2 +- python/pyspark/context.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index a80b3cce60..cb4fb7cfbd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -796,7 +796,7 @@ class SparkContext(config: SparkConf) extends Logging { * Add a file to be downloaded with this Spark job on every node. * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, - * use `SparkFiles.get(path)` to find its download location. + * use `SparkFiles.get(fileName)` to find its download location. */ def addFile(path: String) { val uri = new URI(path) @@ -1619,4 +1619,3 @@ private[spark] class WritableConverter[T]( val writableClass: ClassTag[T] => Class[_ <: Writable], val convert: Writable => T) extends Serializable - diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala index e0a4815940..8e178bc848 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala @@ -545,7 +545,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork * Add a file to be downloaded with this Spark job on every node. * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, - * use `SparkFiles.get(path)` to find its download location. + * use `SparkFiles.get(fileName)` to find its download location. */ def addFile(path: String) { sc.addFile(path) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 82f76de31a..6e4fdaa6ee 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -606,8 +606,8 @@ class SparkContext(object): FTP URI. To access the file in Spark jobs, use - L{SparkFiles.get(path)} to find its - download location. + L{SparkFiles.get(fileName)} with the + filename to find its download location. >>> from pyspark import SparkFiles >>> path = os.path.join(tempdir, "test.txt")