Merge branch 'master' into mos-shuffle-tracked
Conflicts: .gitignore core/src/main/scala/spark/LocalFileShuffle.scala src/scala/spark/BasicLocalFileShuffle.scala src/scala/spark/Broadcast.scala src/scala/spark/LocalFileShuffle.scala
This commit is contained in:
commit
ac7e066383
23
.gitignore
vendored
23
.gitignore
vendored
|
@ -1,7 +1,26 @@
|
|||
*~
|
||||
*.swp
|
||||
build
|
||||
work
|
||||
*.iml
|
||||
.idea/
|
||||
/build/
|
||||
work/
|
||||
out/
|
||||
.DS_Store
|
||||
third_party/libmesos.so
|
||||
third_party/libmesos.dylib
|
||||
conf/java-opts
|
||||
conf/spark-env.sh
|
||||
conf/log4j.properties
|
||||
target/
|
||||
reports/
|
||||
.project
|
||||
.classpath
|
||||
.scala_dependencies
|
||||
lib_managed/
|
||||
src_managed/
|
||||
project/boot/
|
||||
project/plugins/project/build.properties
|
||||
project/build/target/
|
||||
project/plugins/target/
|
||||
project/plugins/lib_managed/
|
||||
project/plugins/src_managed/
|
||||
|
|
27
LICENSE
Normal file
27
LICENSE
Normal file
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2010, Regents of the University of California.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of California, Berkeley nor the
|
||||
names of its contributors may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
79
Makefile
79
Makefile
|
@ -1,79 +0,0 @@
|
|||
EMPTY =
|
||||
SPACE = $(EMPTY) $(EMPTY)
|
||||
|
||||
# Build up classpath by concatenating some strings
|
||||
JARS = third_party/mesos.jar
|
||||
JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
|
||||
JARS += third_party/colt.jar
|
||||
JARS += third_party/guava-r07/guava-r07.jar
|
||||
JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
|
||||
JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
|
||||
JARS += third_party/scalatest-1.2/scalatest-1.2.jar
|
||||
JARS += third_party/scalacheck_2.8.0-1.7.jar
|
||||
JARS += third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
|
||||
JARS += third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
|
||||
JARS += third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
|
||||
JARS += third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
|
||||
JARS += third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
|
||||
CLASSPATH = $(subst $(SPACE),:,$(JARS))
|
||||
|
||||
SCALA_SOURCES = src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
|
||||
SCALA_SOURCES += src/test/spark/*.scala src/test/spark/repl/*.scala
|
||||
|
||||
JAVA_SOURCES = $(wildcard src/java/spark/compress/lzf/*.java)
|
||||
|
||||
ifeq ($(USE_FSC),1)
|
||||
COMPILER_NAME = fsc
|
||||
else
|
||||
COMPILER_NAME = scalac
|
||||
endif
|
||||
|
||||
ifeq ($(SCALA_HOME),)
|
||||
COMPILER = $(COMPILER_NAME)
|
||||
else
|
||||
COMPILER = $(SCALA_HOME)/bin/$(COMPILER_NAME)
|
||||
endif
|
||||
|
||||
CONF_FILES = conf/spark-env.sh conf/log4j.properties conf/java-opts
|
||||
|
||||
all: scala java conf-files
|
||||
|
||||
build/classes:
|
||||
mkdir -p build/classes
|
||||
|
||||
scala: build/classes java
|
||||
$(COMPILER) -d build/classes -classpath build/classes:$(CLASSPATH) $(SCALA_SOURCES)
|
||||
|
||||
java: $(JAVA_SOURCES) build/classes
|
||||
javac -d build/classes $(JAVA_SOURCES)
|
||||
|
||||
native: java
|
||||
$(MAKE) -C src/native
|
||||
|
||||
jar: build/spark.jar build/spark-dep.jar
|
||||
|
||||
dep-jar: build/spark-dep.jar
|
||||
|
||||
build/spark.jar: scala java
|
||||
jar cf build/spark.jar -C build/classes spark
|
||||
|
||||
build/spark-dep.jar:
|
||||
mkdir -p build/dep
|
||||
cd build/dep && for i in $(JARS); do jar xf ../../$$i; done
|
||||
jar cf build/spark-dep.jar -C build/dep .
|
||||
|
||||
conf-files: $(CONF_FILES)
|
||||
|
||||
$(CONF_FILES): %: | %.template
|
||||
cp $@.template $@
|
||||
|
||||
test: all
|
||||
./alltests
|
||||
|
||||
default: all
|
||||
|
||||
clean:
|
||||
$(MAKE) -C src/native clean
|
||||
rm -rf build
|
||||
|
||||
.phony: default all clean scala java native jar dep-jar conf-files
|
28
README
28
README
|
@ -1,24 +1,32 @@
|
|||
ONLINE DOCUMENTATION
|
||||
|
||||
You can find the latest Spark documentation, including a programming guide,
|
||||
on the project wiki at http://github.com/mesos/spark/wiki. This file only
|
||||
contains basic setup instructions.
|
||||
|
||||
|
||||
|
||||
BUILDING
|
||||
|
||||
Spark requires Scala 2.8. This version has been tested with 2.8.0.final.
|
||||
Spark requires Scala 2.8. This version has been tested with 2.8.1.final.
|
||||
|
||||
To build and run Spark, you will need to have Scala's bin in your $PATH,
|
||||
or you will need to set the SCALA_HOME environment variable to point
|
||||
to where you've installed Scala. Scala must be accessible through one
|
||||
of these methods on Mesos slave nodes as well as on the master.
|
||||
The project is built using Simple Build Tool (SBT), which is packaged with it.
|
||||
To build Spark and its example programs, run sbt/sbt compile.
|
||||
|
||||
To build Spark and the example programs, run make.
|
||||
To run Spark, you will need to have Scala's bin in your $PATH, or you
|
||||
will need to set the SCALA_HOME environment variable to point to where
|
||||
you've installed Scala. Scala must be accessible through one of these
|
||||
methods on Mesos slave nodes as well as on the master.
|
||||
|
||||
To run one of the examples, use ./run <class> <params>. For example,
|
||||
./run SparkLR will run the Logistic Regression example. Each of the
|
||||
example programs prints usage help if no params are given.
|
||||
./run spark.examples.SparkLR will run the Logistic Regression example.
|
||||
Each of the example programs prints usage help if no params are given.
|
||||
|
||||
All of the Spark samples take a <host> parameter that is the Mesos master
|
||||
to connect to. This can be a Mesos URL, or "local" to run locally with one
|
||||
thread, or "local[N]" to run locally with N threads.
|
||||
|
||||
Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
|
||||
to have the Makefile use the fsc compiler daemon instead of scalac.
|
||||
|
||||
|
||||
CONFIGURATION
|
||||
|
||||
|
|
11
alltests
11
alltests
|
@ -1,11 +0,0 @@
|
|||
#!/bin/bash
|
||||
FWDIR="`dirname $0`"
|
||||
if [ "x$SPARK_MEM" == "x" ]; then
|
||||
export SPARK_MEM=500m
|
||||
fi
|
||||
RESULTS_DIR="$FWDIR/build/test_results"
|
||||
if [ -d $RESULTS_DIR ]; then
|
||||
rm -r $RESULTS_DIR
|
||||
fi
|
||||
mkdir -p $RESULTS_DIR
|
||||
$FWDIR/run org.scalatest.tools.Runner -p $FWDIR/build/classes -u $RESULTS_DIR -o $@
|
|
@ -1,4 +1,4 @@
|
|||
-Dspark.shuffle.class=spark.TrackedCustomBlockedInMemoryShuffle
|
||||
-Dspark.shuffle.class=spark.CustomBlockedInMemoryShuffle
|
||||
-Dspark.shuffle.masterHostAddress=127.0.0.1
|
||||
-Dspark.shuffle.masterTrackerPort=22222
|
||||
-Dspark.shuffle.trackerStrategy=spark.BalanceRemainingShuffleTrackerStrategy
|
||||
|
|
|
@ -10,4 +10,4 @@
|
|||
# be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
|
||||
# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
|
||||
|
||||
MESOS_HOME=/home/mosharaf/Work/mesos
|
||||
MESOS_HOME=/Users/mosharaf/Work/mesos
|
||||
|
|
11
core/lib/compress-lzf-0.6.0/LICENSE
Normal file
11
core/lib/compress-lzf-0.6.0/LICENSE
Normal file
|
@ -0,0 +1,11 @@
|
|||
Copyright 2009-2010 Ning, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
use this file except in compliance with the License. You may obtain a copy of
|
||||
the License at http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
|
||||
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and limitations under
|
||||
the License.
|
BIN
core/lib/compress-lzf-0.6.0/compress-lzf-0.6.0.jar
Normal file
BIN
core/lib/compress-lzf-0.6.0/compress-lzf-0.6.0.jar
Normal file
Binary file not shown.
|
@ -1,5 +1,387 @@
|
|||
Hadoop Change Log
|
||||
|
||||
Release 0.20.3 - Unreleased
|
||||
|
||||
Release 0.20.2 - 2010-2-19
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
HADOOP-6218. Adds a feature where TFile can be split by Record
|
||||
Sequence number. (Hong Tang and Raghu Angadi via ddas)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-112. Add counters for reduce input, output records to the new API.
|
||||
(Jothi Padmanabhan via cdouglas)
|
||||
|
||||
HADOOP-6231. Allow caching of filesystem instances to be disabled on a
|
||||
per-instance basis (Tom White and Ben Slusky via mahadev)
|
||||
|
||||
MAPREDUCE-826. harchive doesn't use ToolRunner / harchive returns 0 even
|
||||
if the job fails with exception (koji via mahadev)
|
||||
|
||||
MAPREDUCE-979. Fixed JobConf APIs related to memory parameters to return
|
||||
values of new configuration variables when deprecated variables are
|
||||
disabled. (Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
HDFS-686. NullPointerException is thrown while merging edit log and image.
|
||||
(hairong)
|
||||
|
||||
HDFS-677. Rename failure when both source and destination quota exceeds
|
||||
results in deletion of source. (suresh)
|
||||
|
||||
HDFS-709. Fix TestDFSShell failure due to rename bug introduced by
|
||||
HDFS-677. (suresh)
|
||||
|
||||
HDFS-579. Fix DfsTask to follow the semantics of 0.19, regarding non-zero
|
||||
return values as failures. (Christian Kunz via cdouglas)
|
||||
|
||||
MAPREDUCE-1070. Prevent a deadlock in the fair scheduler servlet.
|
||||
(Todd Lipcon via cdouglas)
|
||||
|
||||
HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
|
||||
is used as job InputFormat. (Amareshwari Sriramadasu via zshao)
|
||||
|
||||
HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
|
||||
count at the start of each block in Hadoop archives. (Ben Slusky, Tom
|
||||
White, and Mahadev Konar via cdouglas)
|
||||
|
||||
HDFS-723. Fix deadlock in DFSClient#DFSOutputStream. (hairong)
|
||||
|
||||
HDFS-732. DFSClient.DFSOutputStream.close() should throw an exception if
|
||||
the stream cannot be closed successfully. (szetszwo)
|
||||
|
||||
MAPREDUCE-1163. Remove unused, hard-coded paths from libhdfs. (Allen
|
||||
Wittenauer via cdouglas)
|
||||
|
||||
HDFS-761. Fix failure to process rename operation from edits log due to
|
||||
quota verification. (suresh)
|
||||
|
||||
MAPREDUCE-623. Resolve javac warnings in mapreduce. (Jothi Padmanabhan
|
||||
via sharad)
|
||||
|
||||
HADOOP-6575. Remove call to fault injection tests not present in 0.20.
|
||||
(cdouglas)
|
||||
|
||||
HADOOP-6576. Fix streaming test failures on 0.20. (Todd Lipcon via cdouglas)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon
|
||||
via tomwhite)
|
||||
|
||||
MAPREDUCE-1068. Fix streaming job to show proper message if file is
|
||||
is not present. (Amareshwari Sriramadasu via sharad)
|
||||
|
||||
HDFS-596. Fix memory leak in hdfsFreeFileInfo() for libhdfs.
|
||||
(Zhang Bingjun via dhruba)
|
||||
|
||||
MAPREDUCE-1147. Add map output counters to new API. (Amar Kamat via
|
||||
cdouglas)
|
||||
|
||||
HADOOP-6269. Fix threading issue with defaultResource in Configuration.
|
||||
(Sreekanth Ramakrishnan via cdouglas)
|
||||
|
||||
MAPREDUCE-1182. Fix overflow in reduce causing allocations to exceed the
|
||||
configured threshold. (cdouglas)
|
||||
|
||||
HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress:
|
||||
IllegalArgumentException is thrown. (cos)
|
||||
|
||||
HDFS-185. Disallow chown, chgrp, chmod, setQuota, and setSpaceQuota when
|
||||
name-node is in safemode. (Ravi Phulari via shv)
|
||||
|
||||
HADOOP-6428. HttpServer sleeps with negative values (cos)
|
||||
|
||||
HADOOP-5623. Fixes a problem to do with status messages getting overwritten
|
||||
in streaming jobs. (Rick Cox and Jothi Padmanabhan via tomwhite)
|
||||
|
||||
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
|
||||
GzipCodec. (Aaron Kimball via cdouglas)
|
||||
|
||||
HDFS-187. Initialize secondary namenode http address in TestStartup.
|
||||
(Todd Lipcon via szetszwo)
|
||||
|
||||
MAPREDUCE-433. Use more reliable counters in TestReduceFetch. (cdouglas)
|
||||
|
||||
HDFS-792. DFSClient 0.20.1 is incompatible with HDFS 0.20.2.
|
||||
(Tod Lipcon via hairong)
|
||||
|
||||
HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and
|
||||
hairong via hairong)
|
||||
|
||||
HADOOP-6596. Failing tests prevent the rest of test targets from
|
||||
execution. (cos)
|
||||
|
||||
HADOOP-6524. Contrib tests are failing Clover'ed build. (cos)
|
||||
|
||||
HDFS-919. Create test to validate the BlocksVerified metric (Gary Murry
|
||||
via cos)
|
||||
|
||||
HDFS-907. Add tests for getBlockLocations and totalLoad metrics.
|
||||
(Ravi Phulari via cos)
|
||||
|
||||
MAPREDUCE-1251. c++ utils doesn't compile. (Eli Collins via tomwhite)
|
||||
|
||||
HADOOP-5612. Some c++ scripts are not chmodded before ant execution.
|
||||
(Todd Lipcon via tomwhite)
|
||||
|
||||
Release 0.20.1 - 2009-09-01
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
||||
HADOOP-5726. Remove pre-emption from capacity scheduler code base.
|
||||
(Rahul Kumar Singh via yhemanth)
|
||||
|
||||
HADOOP-5881. Simplify memory monitoring and scheduling related
|
||||
configuration. (Vinod Kumar Vavilapalli via yhemanth)
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
HADOOP-6080. Introduce -skipTrash option to rm and rmr.
|
||||
(Jakob Homan via shv)
|
||||
|
||||
HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HADOOP-5711. Change Namenode file close log to info. (szetszwo)
|
||||
|
||||
HADOOP-5736. Update the capacity scheduler documentation for features
|
||||
like memory based scheduling, job initialization and removal of pre-emption.
|
||||
(Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat
|
||||
and -touchz options. (Ravi Phulari via szetszwo)
|
||||
|
||||
HADOOP-4372. Improves the way history filenames are obtained and manipulated.
|
||||
(Amar Kamat via ddas)
|
||||
|
||||
HADOOP-5897. Add name-node metrics to capture java heap usage.
|
||||
(Suresh Srinivas via shv)
|
||||
|
||||
HDFS-438. Improve help message for space quota command. (Raghu Angadi)
|
||||
|
||||
MAPREDUCE-767. Remove the dependence on the CLI 2.0 snapshot.
|
||||
(Amar Kamat via ddas)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class
|
||||
instead of abstract. (Amareshwari Sriramadasu via sharad)
|
||||
|
||||
HADOOP-5646. Fixes a problem in TestQueueCapacities.
|
||||
(Vinod Kumar Vavilapalli via ddas)
|
||||
|
||||
HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj
|
||||
Das via hairong)
|
||||
|
||||
HADOOP-5654. TestReplicationPolicy.<init> fails on java.net.BindException.
|
||||
(hairong)
|
||||
|
||||
HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo
|
||||
(Nicholas) Sze via cdouglas)
|
||||
|
||||
HADOOP-5213. Fix Null pointer exception caused when bzip2compression
|
||||
was used and user closed a output stream without writing any data.
|
||||
(Zheng Shao via dhruba)
|
||||
|
||||
HADOOP-5718. Remove the check for the default queue in capacity scheduler.
|
||||
(Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
HADOOP-5719. Remove jobs that failed initialization from the waiting queue
|
||||
in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker
|
||||
kills itself if it ever discovers that the port to which jetty is actually
|
||||
bound is invalid (-1). (ddas)
|
||||
|
||||
HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return
|
||||
path value that is returned for the case where the file we want to write
|
||||
is of an unknown size. (Vinod Kumar Vavilapalli via ddas)
|
||||
|
||||
HADOOP-5636. Prevents a job from going to RUNNING state after it has been
|
||||
KILLED (this used to happen when the SetupTask would come back with a
|
||||
success after the job has been killed). (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory
|
||||
based scheduling code when jobs get retired. (yhemanth)
|
||||
|
||||
HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in
|
||||
MiniMRCluster. (yhemanth)
|
||||
|
||||
HADOOP-4981. Fix capacity scheduler to schedule speculative tasks
|
||||
correctly in the presence of High RAM jobs.
|
||||
(Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
HADOOP-5210. Solves a problem in the progress report of the reduce task.
|
||||
(Ravi Gummadi via ddas)
|
||||
|
||||
HADOOP-5850. Fixes a problem to do with not being able to jobs with
|
||||
0 maps/reduces. (Vinod K V via ddas)
|
||||
|
||||
HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException.
|
||||
(Wang Xu via johan)
|
||||
|
||||
HADOOP-4626. Correct the API links in hdfs forrest doc so that they
|
||||
point to the same version of hadoop. (szetszwo)
|
||||
|
||||
HADOOP-5883. Fixed tasktracker memory monitoring to account for
|
||||
momentary spurts in memory usage due to java's fork() model.
|
||||
(yhemanth)
|
||||
|
||||
HADOOP-5539. Fixes a problem to do with not preserving intermediate
|
||||
output compression for merged data.
|
||||
(Jothi Padmanabhan and Billy Pearson via ddas)
|
||||
|
||||
HADOOP-5932. Fixes a problem in capacity scheduler in computing
|
||||
available memory on a tasktracker.
|
||||
(Vinod Kumar Vavilapalli via yhemanth)
|
||||
|
||||
HADOOP-5648. Fixes a build issue in not being able to generate gridmix.jar
|
||||
in hadoop binary tarball. (Giridharan Kesavan via gkesavan)
|
||||
|
||||
HADOOP-5908. Fixes a problem to do with ArithmeticException in the
|
||||
JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-5924. Fixes a corner case problem to do with job recovery with
|
||||
empty history files. Also, after a JT restart, sends KillTaskAction to
|
||||
tasks that report back but the corresponding job hasn't been initialized
|
||||
yet. (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-5882. Fixes a reducer progress update problem for new mapreduce
|
||||
api. (Amareshwari Sriramadasu via sharad)
|
||||
|
||||
HADOOP-5746. Fixes a corner case problem in Streaming, where if an
|
||||
exception happens in MROutputThread after the last call to the map/reduce
|
||||
method, the exception goes undetected. (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs
|
||||
take more slots. (Vinod Kumar Vavilapalli via yhemanth)
|
||||
|
||||
HADOOP-5937. Correct a safemode message in FSNamesystem. (Ravi Phulari
|
||||
via szetszwo)
|
||||
|
||||
HADOOP-5869. Fix bug in assignment of setup / cleanup task that was
|
||||
causing TestQueueCapacities to fail.
|
||||
(Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never
|
||||
used to come up due to a system file creation on JobTracker's system-dir
|
||||
failing. This problem would sometimes show up only when the FS for the
|
||||
system-dir (usually HDFS) is started at nearly the same time as the
|
||||
JobTracker. (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-5920. Fixes a testcase failure for TestJobHistory.
|
||||
(Amar Kamat via ddas)
|
||||
|
||||
HDFS-26. Better error message to users when commands fail because of
|
||||
lack of quota. Allow quota to be set even if the limit is lower than
|
||||
current consumption. (Boris Shkolnik via rangadi)
|
||||
|
||||
MAPREDUCE-2. Fixes a bug in KeyFieldBasedPartitioner in handling empty
|
||||
keys. (Amar Kamat via sharad)
|
||||
|
||||
MAPREDUCE-130. Delete the jobconf copy from the log directory of the
|
||||
JobTracker when the job is retired. (Amar Kamat via sharad)
|
||||
|
||||
MAPREDUCE-657. Fix hardcoded filesystem problem in CompletedJobStatusStore.
|
||||
(Amar Kamat via sharad)
|
||||
|
||||
MAPREDUCE-179. Update progress in new RecordReaders. (cdouglas)
|
||||
|
||||
MAPREDUCE-124. Fix a bug in failure handling of abort task of
|
||||
OutputCommiter. (Amareshwari Sriramadasu via sharad)
|
||||
|
||||
HADOOP-6139. Fix the FsShell help messages for rm and rmr. (Jakob Homan
|
||||
via szetszwo)
|
||||
|
||||
HADOOP-6141. Fix a few bugs in 0.20 test-patch.sh. (Hong Tang via
|
||||
szetszwo)
|
||||
|
||||
HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
|
||||
(Jakob Homan via szetszwo)
|
||||
|
||||
MAPREDUCE-565. Fix partitioner to work with new API. (Owen O'Malley via
|
||||
cdouglas)
|
||||
|
||||
MAPREDUCE-465. Fix a bug in MultithreadedMapRunner. (Amareshwari
|
||||
Sriramadasu via sharad)
|
||||
|
||||
MAPREDUCE-18. Puts some checks to detect cases where jetty serves up
|
||||
incorrect output during shuffle. (Ravi Gummadi via ddas)
|
||||
|
||||
MAPREDUCE-735. Fixes a problem in the KeyFieldHelper to do with
|
||||
the end index for some inputs (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-6150. Users should be able to instantiate comparator using TFile
|
||||
API. (Hong Tang via rangadi)
|
||||
|
||||
MAPREDUCE-383. Fix a bug in Pipes combiner due to bytes count not
|
||||
getting reset after the spill. (Christian Kunz via sharad)
|
||||
|
||||
MAPREDUCE-40. Keep memory management backwards compatible for job
|
||||
configuration parameters and limits. (Rahul Kumar Singh via yhemanth)
|
||||
|
||||
MAPREDUCE-796. Fixes a ClassCastException in an exception log in
|
||||
MultiThreadedMapRunner. (Amar Kamat via ddas)
|
||||
|
||||
MAPREDUCE-838. Fixes a problem in the way commit of task outputs
|
||||
happens. The bug was that even if commit failed, the task would
|
||||
be declared as successful. (Amareshwari Sriramadasu via ddas)
|
||||
|
||||
MAPREDUCE-805. Fixes some deadlocks in the JobTracker due to the fact
|
||||
the JobTracker lock hierarchy wasn't maintained in some JobInProgress
|
||||
method calls. (Amar Kamat via ddas)
|
||||
|
||||
HDFS-167. Fix a bug in DFSClient that caused infinite retries on write.
|
||||
(Bill Zeller via szetszwo)
|
||||
|
||||
HDFS-527. Remove unnecessary DFSClient constructors. (szetszwo)
|
||||
|
||||
MAPREDUCE-832. Reduce number of warning messages printed when
|
||||
deprecated memory variables are used. (Rahul Kumar Singh via yhemanth)
|
||||
|
||||
MAPREDUCE-745. Fixes a testcase problem to do with generation of JobTracker
|
||||
IDs. (Amar Kamat via ddas)
|
||||
|
||||
MAPREDUCE-834. Enables memory management on tasktrackers when old
|
||||
memory management parameters are used in configuration.
|
||||
(Sreekanth Ramakrishnan via yhemanth)
|
||||
|
||||
MAPREDUCE-818. Fixes Counters#getGroup API. (Amareshwari Sriramadasu
|
||||
via sharad)
|
||||
|
||||
MAPREDUCE-807. Handles the AccessControlException during the deletion of
|
||||
mapred.system.dir in the JobTracker. The JobTracker will bail out if it
|
||||
encounters such an exception. (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-6213. Remove commons dependency on commons-cli2. (Amar Kamat via
|
||||
sharad)
|
||||
|
||||
MAPREDUCE-430. Fix a bug related to task getting stuck in case of
|
||||
OOM error. (Amar Kamat via ddas)
|
||||
|
||||
HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the
|
||||
value. (Amar Kamat via sharad)
|
||||
|
||||
MAPREDUCE-421. Fix Pipes to use returned system exit code.
|
||||
(Christian Kunz via omalley)
|
||||
|
||||
HDFS-525. The SimpleDateFormat object in ListPathsServlet is not thread
|
||||
safe. (Suresh Srinivas and cdouglas)
|
||||
|
||||
MAPREDUCE-911. Fix a bug in TestTaskFail related to speculative
|
||||
execution. (Amareshwari Sriramadasu via sharad)
|
||||
|
||||
MAPREDUCE-687. Fix an assertion in TestMiniMRMapRedDebugScript.
|
||||
(Amareshwari Sriramadasu via sharad)
|
||||
|
||||
MAPREDUCE-924. Fixes the TestPipes testcase to use Tool.
|
||||
(Amareshwari Sriramadasu via sharad)
|
||||
|
||||
Release 0.20.0 - 2009-04-15
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -361,6 +743,9 @@ Release 0.20.0 - 2009-04-15
|
|||
HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT
|
||||
JobHistory tag. (Ravi Gummadi via ddas)
|
||||
|
||||
HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan
|
||||
via szetszwo)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
|
||||
|
@ -945,6 +1330,18 @@ Release 0.19.2 - Unreleased
|
|||
HADOOP-5551. Prevent directory destruction on file create.
|
||||
(Brian Bockelman via shv)
|
||||
|
||||
HADOOP-5671. Fix FNF exceptions when copying from old versions of
|
||||
HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas)
|
||||
|
||||
HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF
|
||||
conditions. (Brian Bockelman via cdouglas)
|
||||
|
||||
HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with
|
||||
ArrayIndexOutOfBounds exception. (He Yongqiang via ddas)
|
||||
|
||||
HADOOP-5951. Add Apache license header to StorageInfo.java. (Suresh
|
||||
Srinivas via szetszwo)
|
||||
|
||||
Release 0.19.1 - 2009-02-23
|
||||
|
||||
IMPROVEMENTS
|
||||
|
@ -2035,6 +2432,12 @@ Release 0.18.4 - Unreleased
|
|||
HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks.
|
||||
(szetszwo)
|
||||
|
||||
HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong)
|
||||
|
||||
HADOOP-6017. Lease Manager in NameNode does not handle certain characters
|
||||
in filenames. This results in fatal errors in Secondary NameNode and while
|
||||
restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi)
|
||||
|
||||
Release 0.18.3 - 2009-01-27
|
||||
|
||||
IMPROVEMENTS
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
<property name="Name" value="Hadoop"/>
|
||||
<property name="name" value="hadoop"/>
|
||||
<property name="version" value="0.20.1-dev"/>
|
||||
<property name="version" value="0.20.3-dev"/>
|
||||
<property name="final.name" value="${name}-${version}"/>
|
||||
<property name="year" value="2009"/>
|
||||
|
||||
|
@ -137,7 +137,7 @@
|
|||
|
||||
<property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
|
||||
<property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
|
||||
<property name="jdiff.stable" value="0.19.1"/>
|
||||
<property name="jdiff.stable" value="0.19.2"/>
|
||||
<property name="jdiff.stable.javadoc"
|
||||
value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
|
||||
|
||||
|
@ -704,6 +704,7 @@
|
|||
<!-- ================================================================== -->
|
||||
<target name="test-core" depends="jar-test" description="Run core unit tests">
|
||||
|
||||
<delete file="${test.build.dir}/testsfailed"/>
|
||||
<delete dir="${test.build.data}"/>
|
||||
<mkdir dir="${test.build.data}"/>
|
||||
<delete dir="${test.log.dir}"/>
|
||||
|
@ -728,6 +729,10 @@
|
|||
<sysproperty key="java.library.path"
|
||||
value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
|
||||
<sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
|
||||
<!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
|
||||
<syspropertyset dynamic="no">
|
||||
<propertyref name="io.compression.codec.lzo.class"/>
|
||||
</syspropertyset>
|
||||
<!-- set compile.c++ in the child jvm only if it is set -->
|
||||
<syspropertyset dynamic="no">
|
||||
<propertyref name="compile.c++"/>
|
||||
|
@ -743,18 +748,30 @@
|
|||
<fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
|
||||
</batchtest>
|
||||
</junit>
|
||||
<fail if="tests.failed">Tests failed!</fail>
|
||||
<antcall target="checkfailure"/>
|
||||
</target>
|
||||
|
||||
<target name="checkfailure" if="tests.failed">
|
||||
<touch file="${test.build.dir}/testsfailed"/>
|
||||
<fail unless="continueOnFailure">Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<target name="test-contrib" depends="compile, compile-core-test" description="Run contrib unit tests">
|
||||
<subant target="test">
|
||||
<property name="version" value="${version}"/>
|
||||
<property name="clover.jar" value="${clover.jar}"/>
|
||||
<fileset file="${contrib.dir}/build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="test" depends="test-core, test-contrib" description="Run core, contrib unit tests">
|
||||
</target>
|
||||
<target name="test" description="Run core, contrib tests">
|
||||
<delete file="${test.build.dir}/testsfailed"/>
|
||||
<property name="continueOnFailure" value="true"/>
|
||||
<antcall target="test-core"/>
|
||||
<antcall target="test-contrib"/>
|
||||
<available file="${test.build.dir}/testsfailed" property="testsfailed"/>
|
||||
<fail if="testsfailed">Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<!-- Run all unit tests, not just Test*, and use non-test configuration. -->
|
||||
<target name="test-cluster" description="Run all unit tests, not just Test*, and use non-test configuration.">
|
||||
|
@ -1393,6 +1410,7 @@
|
|||
<target name="create-c++-utils-makefile" depends="check-c++-makefiles"
|
||||
if="need.c++.utils.makefile">
|
||||
<mkdir dir="${build.c++.utils}"/>
|
||||
<chmod file="${c++.utils.src}/configure" perm="ugo+x"/>
|
||||
<exec executable="${c++.utils.src}/configure" dir="${build.c++.utils}"
|
||||
failonerror="yes">
|
||||
<arg value="--prefix=${install.c++}"/>
|
||||
|
@ -1410,6 +1428,7 @@
|
|||
<target name="create-c++-pipes-makefile" depends="check-c++-makefiles"
|
||||
if="need.c++.pipes.makefile">
|
||||
<mkdir dir="${build.c++.pipes}"/>
|
||||
<chmod file="${c++.pipes.src}/configure" perm="ugo+x"/>
|
||||
<exec executable="${c++.pipes.src}/configure" dir="${build.c++.pipes}"
|
||||
failonerror="yes">
|
||||
<arg value="--prefix=${install.c++}"/>
|
||||
|
@ -1432,6 +1451,7 @@
|
|||
depends="check-c++-makefiles"
|
||||
if="need.c++.examples.pipes.makefile">
|
||||
<mkdir dir="${build.c++.examples.pipes}"/>
|
||||
<chmod file="${c++.examples.pipes.src}/configure" perm="ugo+x"/>
|
||||
<exec executable="${c++.examples.pipes.src}/configure"
|
||||
dir="${build.c++.examples.pipes}"
|
||||
failonerror="yes">
|
|
@ -8,21 +8,13 @@
|
|||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.guaranteed-capacity</name>
|
||||
<name>mapred.capacity-scheduler.queue.default.capacity</name>
|
||||
<value>100</value>
|
||||
<description>Percentage of the number of slots in the cluster that are
|
||||
guaranteed to be available for jobs in this queue.
|
||||
to be available for jobs in this queue.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.reclaim-time-limit</name>
|
||||
<value>300</value>
|
||||
<description>The amount of time, in seconds, before which
|
||||
resources distributed to other queues will be reclaimed.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.supports-priority</name>
|
||||
<value>false</value>
|
||||
|
@ -54,28 +46,9 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.reclaimCapacity.interval</name>
|
||||
<value>5</value>
|
||||
<description>The time interval, in seconds, between which the scheduler
|
||||
periodically determines whether capacity needs to be reclaimed for
|
||||
any queue.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- The default configuration settings for the capacity task scheduler -->
|
||||
<!-- The default values would be applied to all the queues which don't have -->
|
||||
<!-- the appropriate property for the particular queue -->
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-reclaim-time-limit</name>
|
||||
<value>300</value>
|
||||
<description>The amount of time, in seconds, before which
|
||||
resources distributed to other queues will be reclaimed by default
|
||||
in a job queue.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-supports-priority</name>
|
||||
<value>false</value>
|
||||
|
@ -83,37 +56,6 @@
|
|||
account in scheduling decisions by default in a job queue.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</name>
|
||||
<value>-1</value>
|
||||
<description>If mapred.task.maxpmem is set to -1, this configuration will
|
||||
be used to calculate job's physical memory requirements as a percentage of
|
||||
the job's virtual memory requirements set via mapred.task.maxvmem. This
|
||||
property thus provides default value of physical memory for job's that
|
||||
don't explicitly specify physical memory requirements.
|
||||
|
||||
If not explicitly set to a valid value, scheduler will not consider
|
||||
physical memory for scheduling even if virtual memory based scheduling is
|
||||
enabled(by setting valid values for both mapred.task.default.maxvmem and
|
||||
mapred.task.limit.maxvmem).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.task.limit.maxpmem</name>
|
||||
<value>-1</value>
|
||||
<description>Configuration that provides an upper limit on the maximum
|
||||
physical memory that can be specified by a job. The job configuration
|
||||
mapred.task.maxpmem should be less than this value. If not, the job will
|
||||
be rejected by the scheduler.
|
||||
|
||||
If it is set to -1, scheduler will not consider physical memory for
|
||||
scheduling even if virtual memory based scheduling is enabled(by setting
|
||||
valid values for both mapred.task.default.maxvmem and
|
||||
mapred.task.limit.maxvmem).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -80,10 +80,10 @@
|
|||
<dependencies>
|
||||
|
||||
<!--used client side-->
|
||||
<!-- <dependency org="commons-cli"
|
||||
name="commons-cli"
|
||||
rev="${commons-cli.version}"
|
||||
conf="client->default"/> -->
|
||||
<dependency org="commons-cli"
|
||||
name="commons-cli"
|
||||
rev="${commons-cli.version}"
|
||||
conf="client->default"/>
|
||||
|
||||
<dependency org="checkstyle"
|
||||
name="checkstyle"
|
||||
|
@ -256,6 +256,10 @@
|
|||
rev="${slf4j-log4j12.version}"
|
||||
conf="common->master">
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<dependency org="org.mockito"
|
||||
name="mockito-all"
|
||||
rev="${mockito-all.version}"
|
||||
conf="common->master"/>
|
||||
</dependencies>
|
||||
|
||||
</ivy-module>
|
|
@ -21,7 +21,7 @@ apacheant.version=1.7.0
|
|||
|
||||
checkstyle.version=4.2
|
||||
|
||||
commons-cli.version=2.0-SNAPSHOT
|
||||
commons-cli.version=1.2
|
||||
commons-codec.version=1.3
|
||||
commons-collections.version=3.1
|
||||
commons-httpclient.version=3.0.1
|
||||
|
@ -57,6 +57,8 @@ kfs.version=0.1
|
|||
log4j.version=1.2.15
|
||||
lucene-core.version=2.3.1
|
||||
|
||||
mockito-all.version=1.8.0
|
||||
|
||||
oro.version=2.0.8
|
||||
|
||||
rats-lib.version=0.5.1
|
BIN
core/lib/hadoop-0.20.2/lib/commons-cli-1.2.jar
Normal file
BIN
core/lib/hadoop-0.20.2/lib/commons-cli-1.2.jar
Normal file
Binary file not shown.
44204
core/lib/hadoop-0.20.2/lib/jdiff/hadoop_0.19.2.xml
Normal file
44204
core/lib/hadoop-0.20.2/lib/jdiff/hadoop_0.19.2.xml
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
53959
core/lib/hadoop-0.20.2/lib/jdiff/hadoop_0.20.2.xml
Normal file
53959
core/lib/hadoop-0.20.2/lib/jdiff/hadoop_0.20.2.xml
Normal file
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue