Merge branch 'master' into mos-shuffle-tracked

Conflicts:
	.gitignore
	core/src/main/scala/spark/LocalFileShuffle.scala
	src/scala/spark/BasicLocalFileShuffle.scala
	src/scala/spark/Broadcast.scala
	src/scala/spark/LocalFileShuffle.scala
This commit is contained in:
Mosharaf Chowdhury 2011-04-27 14:35:03 -07:00
commit ac7e066383
304 changed files with 103121 additions and 4660 deletions

23
.gitignore vendored
View file

@ -1,7 +1,26 @@
*~
*.swp
build
work
*.iml
.idea/
/build/
work/
out/
.DS_Store
third_party/libmesos.so
third_party/libmesos.dylib
conf/java-opts
conf/spark-env.sh
conf/log4j.properties
target/
reports/
.project
.classpath
.scala_dependencies
lib_managed/
src_managed/
project/boot/
project/plugins/project/build.properties
project/build/target/
project/plugins/target/
project/plugins/lib_managed/
project/plugins/src_managed/

27
LICENSE Normal file
View file

@ -0,0 +1,27 @@
Copyright (c) 2010, Regents of the University of California.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of California, Berkeley nor the
names of its contributors may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,79 +0,0 @@
EMPTY =
SPACE = $(EMPTY) $(EMPTY)
# Build up classpath by concatenating some strings
JARS = third_party/mesos.jar
JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
JARS += third_party/colt.jar
JARS += third_party/guava-r07/guava-r07.jar
JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
JARS += third_party/scalatest-1.2/scalatest-1.2.jar
JARS += third_party/scalacheck_2.8.0-1.7.jar
JARS += third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
JARS += third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
JARS += third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
JARS += third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
JARS += third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
CLASSPATH = $(subst $(SPACE),:,$(JARS))
SCALA_SOURCES = src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
SCALA_SOURCES += src/test/spark/*.scala src/test/spark/repl/*.scala
JAVA_SOURCES = $(wildcard src/java/spark/compress/lzf/*.java)
ifeq ($(USE_FSC),1)
COMPILER_NAME = fsc
else
COMPILER_NAME = scalac
endif
ifeq ($(SCALA_HOME),)
COMPILER = $(COMPILER_NAME)
else
COMPILER = $(SCALA_HOME)/bin/$(COMPILER_NAME)
endif
CONF_FILES = conf/spark-env.sh conf/log4j.properties conf/java-opts
all: scala java conf-files
build/classes:
mkdir -p build/classes
scala: build/classes java
$(COMPILER) -d build/classes -classpath build/classes:$(CLASSPATH) $(SCALA_SOURCES)
java: $(JAVA_SOURCES) build/classes
javac -d build/classes $(JAVA_SOURCES)
native: java
$(MAKE) -C src/native
jar: build/spark.jar build/spark-dep.jar
dep-jar: build/spark-dep.jar
build/spark.jar: scala java
jar cf build/spark.jar -C build/classes spark
build/spark-dep.jar:
mkdir -p build/dep
cd build/dep && for i in $(JARS); do jar xf ../../$$i; done
jar cf build/spark-dep.jar -C build/dep .
conf-files: $(CONF_FILES)
$(CONF_FILES): %: | %.template
cp $@.template $@
test: all
./alltests
default: all
clean:
$(MAKE) -C src/native clean
rm -rf build
.phony: default all clean scala java native jar dep-jar conf-files

28
README
View file

@ -1,24 +1,32 @@
ONLINE DOCUMENTATION
You can find the latest Spark documentation, including a programming guide,
on the project wiki at http://github.com/mesos/spark/wiki. This file only
contains basic setup instructions.
BUILDING
Spark requires Scala 2.8. This version has been tested with 2.8.0.final.
Spark requires Scala 2.8. This version has been tested with 2.8.1.final.
To build and run Spark, you will need to have Scala's bin in your $PATH,
or you will need to set the SCALA_HOME environment variable to point
to where you've installed Scala. Scala must be accessible through one
of these methods on Mesos slave nodes as well as on the master.
The project is built using Simple Build Tool (SBT), which is packaged with it.
To build Spark and its example programs, run sbt/sbt compile.
To build Spark and the example programs, run make.
To run Spark, you will need to have Scala's bin in your $PATH, or you
will need to set the SCALA_HOME environment variable to point to where
you've installed Scala. Scala must be accessible through one of these
methods on Mesos slave nodes as well as on the master.
To run one of the examples, use ./run <class> <params>. For example,
./run SparkLR will run the Logistic Regression example. Each of the
example programs prints usage help if no params are given.
./run spark.examples.SparkLR will run the Logistic Regression example.
Each of the example programs prints usage help if no params are given.
All of the Spark samples take a <host> parameter that is the Mesos master
to connect to. This can be a Mesos URL, or "local" to run locally with one
thread, or "local[N]" to run locally with N threads.
Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
to have the Makefile use the fsc compiler daemon instead of scalac.
CONFIGURATION

View file

@ -1,11 +0,0 @@
#!/bin/bash
FWDIR="`dirname $0`"
if [ "x$SPARK_MEM" == "x" ]; then
export SPARK_MEM=500m
fi
RESULTS_DIR="$FWDIR/build/test_results"
if [ -d $RESULTS_DIR ]; then
rm -r $RESULTS_DIR
fi
mkdir -p $RESULTS_DIR
$FWDIR/run org.scalatest.tools.Runner -p $FWDIR/build/classes -u $RESULTS_DIR -o $@

View file

@ -1,4 +1,4 @@
-Dspark.shuffle.class=spark.TrackedCustomBlockedInMemoryShuffle
-Dspark.shuffle.class=spark.CustomBlockedInMemoryShuffle
-Dspark.shuffle.masterHostAddress=127.0.0.1
-Dspark.shuffle.masterTrackerPort=22222
-Dspark.shuffle.trackerStrategy=spark.BalanceRemainingShuffleTrackerStrategy

View file

@ -10,4 +10,4 @@
# be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
MESOS_HOME=/home/mosharaf/Work/mesos
MESOS_HOME=/Users/mosharaf/Work/mesos

View file

@ -0,0 +1,11 @@
Copyright 2009-2010 Ning, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License.

Binary file not shown.

View file

@ -1,5 +1,387 @@
Hadoop Change Log
Release 0.20.3 - Unreleased
Release 0.20.2 - 2010-2-19
NEW FEATURES
HADOOP-6218. Adds a feature where TFile can be split by Record
Sequence number. (Hong Tang and Raghu Angadi via ddas)
BUG FIXES
MAPREDUCE-112. Add counters for reduce input, output records to the new API.
(Jothi Padmanabhan via cdouglas)
HADOOP-6231. Allow caching of filesystem instances to be disabled on a
per-instance basis (Tom White and Ben Slusky via mahadev)
MAPREDUCE-826. harchive doesn't use ToolRunner / harchive returns 0 even
if the job fails with exception (koji via mahadev)
MAPREDUCE-979. Fixed JobConf APIs related to memory parameters to return
values of new configuration variables when deprecated variables are
disabled. (Sreekanth Ramakrishnan via yhemanth)
HDFS-686. NullPointerException is thrown while merging edit log and image.
(hairong)
HDFS-677. Rename failure when both source and destination quota exceeds
results in deletion of source. (suresh)
HDFS-709. Fix TestDFSShell failure due to rename bug introduced by
HDFS-677. (suresh)
HDFS-579. Fix DfsTask to follow the semantics of 0.19, regarding non-zero
return values as failures. (Christian Kunz via cdouglas)
MAPREDUCE-1070. Prevent a deadlock in the fair scheduler servlet.
(Todd Lipcon via cdouglas)
HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
is used as job InputFormat. (Amareshwari Sriramadasu via zshao)
HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
count at the start of each block in Hadoop archives. (Ben Slusky, Tom
White, and Mahadev Konar via cdouglas)
HDFS-723. Fix deadlock in DFSClient#DFSOutputStream. (hairong)
HDFS-732. DFSClient.DFSOutputStream.close() should throw an exception if
the stream cannot be closed successfully. (szetszwo)
MAPREDUCE-1163. Remove unused, hard-coded paths from libhdfs. (Allen
Wittenauer via cdouglas)
HDFS-761. Fix failure to process rename operation from edits log due to
quota verification. (suresh)
MAPREDUCE-623. Resolve javac warnings in mapreduce. (Jothi Padmanabhan
via sharad)
HADOOP-6575. Remove call to fault injection tests not present in 0.20.
(cdouglas)
HADOOP-6576. Fix streaming test failures on 0.20. (Todd Lipcon via cdouglas)
IMPROVEMENTS
HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon
via tomwhite)
MAPREDUCE-1068. Fix streaming job to show proper message if file is
is not present. (Amareshwari Sriramadasu via sharad)
HDFS-596. Fix memory leak in hdfsFreeFileInfo() for libhdfs.
(Zhang Bingjun via dhruba)
MAPREDUCE-1147. Add map output counters to new API. (Amar Kamat via
cdouglas)
HADOOP-6269. Fix threading issue with defaultResource in Configuration.
(Sreekanth Ramakrishnan via cdouglas)
MAPREDUCE-1182. Fix overflow in reduce causing allocations to exceed the
configured threshold. (cdouglas)
HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress:
IllegalArgumentException is thrown. (cos)
HDFS-185. Disallow chown, chgrp, chmod, setQuota, and setSpaceQuota when
name-node is in safemode. (Ravi Phulari via shv)
HADOOP-6428. HttpServer sleeps with negative values (cos)
HADOOP-5623. Fixes a problem to do with status messages getting overwritten
in streaming jobs. (Rick Cox and Jothi Padmanabhan via tomwhite)
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. (Aaron Kimball via cdouglas)
HDFS-187. Initialize secondary namenode http address in TestStartup.
(Todd Lipcon via szetszwo)
MAPREDUCE-433. Use more reliable counters in TestReduceFetch. (cdouglas)
HDFS-792. DFSClient 0.20.1 is incompatible with HDFS 0.20.2.
(Tod Lipcon via hairong)
HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and
hairong via hairong)
HADOOP-6596. Failing tests prevent the rest of test targets from
execution. (cos)
HADOOP-6524. Contrib tests are failing Clover'ed build. (cos)
HDFS-919. Create test to validate the BlocksVerified metric (Gary Murry
via cos)
HDFS-907. Add tests for getBlockLocations and totalLoad metrics.
(Ravi Phulari via cos)
MAPREDUCE-1251. c++ utils doesn't compile. (Eli Collins via tomwhite)
HADOOP-5612. Some c++ scripts are not chmodded before ant execution.
(Todd Lipcon via tomwhite)
Release 0.20.1 - 2009-09-01
INCOMPATIBLE CHANGES
HADOOP-5726. Remove pre-emption from capacity scheduler code base.
(Rahul Kumar Singh via yhemanth)
HADOOP-5881. Simplify memory monitoring and scheduling related
configuration. (Vinod Kumar Vavilapalli via yhemanth)
NEW FEATURES
HADOOP-6080. Introduce -skipTrash option to rm and rmr.
(Jakob Homan via shv)
HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas)
IMPROVEMENTS
HADOOP-5711. Change Namenode file close log to info. (szetszwo)
HADOOP-5736. Update the capacity scheduler documentation for features
like memory based scheduling, job initialization and removal of pre-emption.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat
and -touchz options. (Ravi Phulari via szetszwo)
HADOOP-4372. Improves the way history filenames are obtained and manipulated.
(Amar Kamat via ddas)
HADOOP-5897. Add name-node metrics to capture java heap usage.
(Suresh Srinivas via shv)
HDFS-438. Improve help message for space quota command. (Raghu Angadi)
MAPREDUCE-767. Remove the dependence on the CLI 2.0 snapshot.
(Amar Kamat via ddas)
OPTIMIZATIONS
BUG FIXES
HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class
instead of abstract. (Amareshwari Sriramadasu via sharad)
HADOOP-5646. Fixes a problem in TestQueueCapacities.
(Vinod Kumar Vavilapalli via ddas)
HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj
Das via hairong)
HADOOP-5654. TestReplicationPolicy.<init> fails on java.net.BindException.
(hairong)
HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo
(Nicholas) Sze via cdouglas)
HADOOP-5213. Fix Null pointer exception caused when bzip2compression
was used and user closed a output stream without writing any data.
(Zheng Shao via dhruba)
HADOOP-5718. Remove the check for the default queue in capacity scheduler.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-5719. Remove jobs that failed initialization from the waiting queue
in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker
kills itself if it ever discovers that the port to which jetty is actually
bound is invalid (-1). (ddas)
HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return
path value that is returned for the case where the file we want to write
is of an unknown size. (Vinod Kumar Vavilapalli via ddas)
HADOOP-5636. Prevents a job from going to RUNNING state after it has been
KILLED (this used to happen when the SetupTask would come back with a
success after the job has been killed). (Amar Kamat via ddas)
HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory
based scheduling code when jobs get retired. (yhemanth)
HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in
MiniMRCluster. (yhemanth)
HADOOP-4981. Fix capacity scheduler to schedule speculative tasks
correctly in the presence of High RAM jobs.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-5210. Solves a problem in the progress report of the reduce task.
(Ravi Gummadi via ddas)
HADOOP-5850. Fixes a problem to do with not being able to jobs with
0 maps/reduces. (Vinod K V via ddas)
HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException.
(Wang Xu via johan)
HADOOP-4626. Correct the API links in hdfs forrest doc so that they
point to the same version of hadoop. (szetszwo)
HADOOP-5883. Fixed tasktracker memory monitoring to account for
momentary spurts in memory usage due to java's fork() model.
(yhemanth)
HADOOP-5539. Fixes a problem to do with not preserving intermediate
output compression for merged data.
(Jothi Padmanabhan and Billy Pearson via ddas)
HADOOP-5932. Fixes a problem in capacity scheduler in computing
available memory on a tasktracker.
(Vinod Kumar Vavilapalli via yhemanth)
HADOOP-5648. Fixes a build issue in not being able to generate gridmix.jar
in hadoop binary tarball. (Giridharan Kesavan via gkesavan)
HADOOP-5908. Fixes a problem to do with ArithmeticException in the
JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas)
HADOOP-5924. Fixes a corner case problem to do with job recovery with
empty history files. Also, after a JT restart, sends KillTaskAction to
tasks that report back but the corresponding job hasn't been initialized
yet. (Amar Kamat via ddas)
HADOOP-5882. Fixes a reducer progress update problem for new mapreduce
api. (Amareshwari Sriramadasu via sharad)
HADOOP-5746. Fixes a corner case problem in Streaming, where if an
exception happens in MROutputThread after the last call to the map/reduce
method, the exception goes undetected. (Amar Kamat via ddas)
HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs
take more slots. (Vinod Kumar Vavilapalli via yhemanth)
HADOOP-5937. Correct a safemode message in FSNamesystem. (Ravi Phulari
via szetszwo)
HADOOP-5869. Fix bug in assignment of setup / cleanup task that was
causing TestQueueCapacities to fail.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never
used to come up due to a system file creation on JobTracker's system-dir
failing. This problem would sometimes show up only when the FS for the
system-dir (usually HDFS) is started at nearly the same time as the
JobTracker. (Amar Kamat via ddas)
HADOOP-5920. Fixes a testcase failure for TestJobHistory.
(Amar Kamat via ddas)
HDFS-26. Better error message to users when commands fail because of
lack of quota. Allow quota to be set even if the limit is lower than
current consumption. (Boris Shkolnik via rangadi)
MAPREDUCE-2. Fixes a bug in KeyFieldBasedPartitioner in handling empty
keys. (Amar Kamat via sharad)
MAPREDUCE-130. Delete the jobconf copy from the log directory of the
JobTracker when the job is retired. (Amar Kamat via sharad)
MAPREDUCE-657. Fix hardcoded filesystem problem in CompletedJobStatusStore.
(Amar Kamat via sharad)
MAPREDUCE-179. Update progress in new RecordReaders. (cdouglas)
MAPREDUCE-124. Fix a bug in failure handling of abort task of
OutputCommiter. (Amareshwari Sriramadasu via sharad)
HADOOP-6139. Fix the FsShell help messages for rm and rmr. (Jakob Homan
via szetszwo)
HADOOP-6141. Fix a few bugs in 0.20 test-patch.sh. (Hong Tang via
szetszwo)
HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
(Jakob Homan via szetszwo)
MAPREDUCE-565. Fix partitioner to work with new API. (Owen O'Malley via
cdouglas)
MAPREDUCE-465. Fix a bug in MultithreadedMapRunner. (Amareshwari
Sriramadasu via sharad)
MAPREDUCE-18. Puts some checks to detect cases where jetty serves up
incorrect output during shuffle. (Ravi Gummadi via ddas)
MAPREDUCE-735. Fixes a problem in the KeyFieldHelper to do with
the end index for some inputs (Amar Kamat via ddas)
HADOOP-6150. Users should be able to instantiate comparator using TFile
API. (Hong Tang via rangadi)
MAPREDUCE-383. Fix a bug in Pipes combiner due to bytes count not
getting reset after the spill. (Christian Kunz via sharad)
MAPREDUCE-40. Keep memory management backwards compatible for job
configuration parameters and limits. (Rahul Kumar Singh via yhemanth)
MAPREDUCE-796. Fixes a ClassCastException in an exception log in
MultiThreadedMapRunner. (Amar Kamat via ddas)
MAPREDUCE-838. Fixes a problem in the way commit of task outputs
happens. The bug was that even if commit failed, the task would
be declared as successful. (Amareshwari Sriramadasu via ddas)
MAPREDUCE-805. Fixes some deadlocks in the JobTracker due to the fact
the JobTracker lock hierarchy wasn't maintained in some JobInProgress
method calls. (Amar Kamat via ddas)
HDFS-167. Fix a bug in DFSClient that caused infinite retries on write.
(Bill Zeller via szetszwo)
HDFS-527. Remove unnecessary DFSClient constructors. (szetszwo)
MAPREDUCE-832. Reduce number of warning messages printed when
deprecated memory variables are used. (Rahul Kumar Singh via yhemanth)
MAPREDUCE-745. Fixes a testcase problem to do with generation of JobTracker
IDs. (Amar Kamat via ddas)
MAPREDUCE-834. Enables memory management on tasktrackers when old
memory management parameters are used in configuration.
(Sreekanth Ramakrishnan via yhemanth)
MAPREDUCE-818. Fixes Counters#getGroup API. (Amareshwari Sriramadasu
via sharad)
MAPREDUCE-807. Handles the AccessControlException during the deletion of
mapred.system.dir in the JobTracker. The JobTracker will bail out if it
encounters such an exception. (Amar Kamat via ddas)
HADOOP-6213. Remove commons dependency on commons-cli2. (Amar Kamat via
sharad)
MAPREDUCE-430. Fix a bug related to task getting stuck in case of
OOM error. (Amar Kamat via ddas)
HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the
value. (Amar Kamat via sharad)
MAPREDUCE-421. Fix Pipes to use returned system exit code.
(Christian Kunz via omalley)
HDFS-525. The SimpleDateFormat object in ListPathsServlet is not thread
safe. (Suresh Srinivas and cdouglas)
MAPREDUCE-911. Fix a bug in TestTaskFail related to speculative
execution. (Amareshwari Sriramadasu via sharad)
MAPREDUCE-687. Fix an assertion in TestMiniMRMapRedDebugScript.
(Amareshwari Sriramadasu via sharad)
MAPREDUCE-924. Fixes the TestPipes testcase to use Tool.
(Amareshwari Sriramadasu via sharad)
Release 0.20.0 - 2009-04-15
INCOMPATIBLE CHANGES
@ -361,6 +743,9 @@ Release 0.20.0 - 2009-04-15
HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT
JobHistory tag. (Ravi Gummadi via ddas)
HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan
via szetszwo)
OPTIMIZATIONS
HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
@ -945,6 +1330,18 @@ Release 0.19.2 - Unreleased
HADOOP-5551. Prevent directory destruction on file create.
(Brian Bockelman via shv)
HADOOP-5671. Fix FNF exceptions when copying from old versions of
HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas)
HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF
conditions. (Brian Bockelman via cdouglas)
HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with
ArrayIndexOutOfBounds exception. (He Yongqiang via ddas)
HADOOP-5951. Add Apache license header to StorageInfo.java. (Suresh
Srinivas via szetszwo)
Release 0.19.1 - 2009-02-23
IMPROVEMENTS
@ -2035,6 +2432,12 @@ Release 0.18.4 - Unreleased
HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks.
(szetszwo)
HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong)
HADOOP-6017. Lease Manager in NameNode does not handle certain characters
in filenames. This results in fatal errors in Secondary NameNode and while
restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi)
Release 0.18.3 - 2009-01-27
IMPROVEMENTS

View file

@ -27,7 +27,7 @@
<property name="Name" value="Hadoop"/>
<property name="name" value="hadoop"/>
<property name="version" value="0.20.1-dev"/>
<property name="version" value="0.20.3-dev"/>
<property name="final.name" value="${name}-${version}"/>
<property name="year" value="2009"/>
@ -137,7 +137,7 @@
<property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
<property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
<property name="jdiff.stable" value="0.19.1"/>
<property name="jdiff.stable" value="0.19.2"/>
<property name="jdiff.stable.javadoc"
value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
@ -704,6 +704,7 @@
<!-- ================================================================== -->
<target name="test-core" depends="jar-test" description="Run core unit tests">
<delete file="${test.build.dir}/testsfailed"/>
<delete dir="${test.build.data}"/>
<mkdir dir="${test.build.data}"/>
<delete dir="${test.log.dir}"/>
@ -728,6 +729,10 @@
<sysproperty key="java.library.path"
value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
<sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
<!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
<syspropertyset dynamic="no">
<propertyref name="io.compression.codec.lzo.class"/>
</syspropertyset>
<!-- set compile.c++ in the child jvm only if it is set -->
<syspropertyset dynamic="no">
<propertyref name="compile.c++"/>
@ -743,18 +748,30 @@
<fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
</batchtest>
</junit>
<fail if="tests.failed">Tests failed!</fail>
<antcall target="checkfailure"/>
</target>
<target name="checkfailure" if="tests.failed">
<touch file="${test.build.dir}/testsfailed"/>
<fail unless="continueOnFailure">Tests failed!</fail>
</target>
<target name="test-contrib" depends="compile, compile-core-test" description="Run contrib unit tests">
<subant target="test">
<property name="version" value="${version}"/>
<property name="clover.jar" value="${clover.jar}"/>
<fileset file="${contrib.dir}/build.xml"/>
</subant>
</target>
<target name="test" depends="test-core, test-contrib" description="Run core, contrib unit tests">
</target>
<target name="test" description="Run core, contrib tests">
<delete file="${test.build.dir}/testsfailed"/>
<property name="continueOnFailure" value="true"/>
<antcall target="test-core"/>
<antcall target="test-contrib"/>
<available file="${test.build.dir}/testsfailed" property="testsfailed"/>
<fail if="testsfailed">Tests failed!</fail>
</target>
<!-- Run all unit tests, not just Test*, and use non-test configuration. -->
<target name="test-cluster" description="Run all unit tests, not just Test*, and use non-test configuration.">
@ -1393,6 +1410,7 @@
<target name="create-c++-utils-makefile" depends="check-c++-makefiles"
if="need.c++.utils.makefile">
<mkdir dir="${build.c++.utils}"/>
<chmod file="${c++.utils.src}/configure" perm="ugo+x"/>
<exec executable="${c++.utils.src}/configure" dir="${build.c++.utils}"
failonerror="yes">
<arg value="--prefix=${install.c++}"/>
@ -1410,6 +1428,7 @@
<target name="create-c++-pipes-makefile" depends="check-c++-makefiles"
if="need.c++.pipes.makefile">
<mkdir dir="${build.c++.pipes}"/>
<chmod file="${c++.pipes.src}/configure" perm="ugo+x"/>
<exec executable="${c++.pipes.src}/configure" dir="${build.c++.pipes}"
failonerror="yes">
<arg value="--prefix=${install.c++}"/>
@ -1432,6 +1451,7 @@
depends="check-c++-makefiles"
if="need.c++.examples.pipes.makefile">
<mkdir dir="${build.c++.examples.pipes}"/>
<chmod file="${c++.examples.pipes.src}/configure" perm="ugo+x"/>
<exec executable="${c++.examples.pipes.src}/configure"
dir="${build.c++.examples.pipes}"
failonerror="yes">

View file

@ -8,21 +8,13 @@
<configuration>
<property>
<name>mapred.capacity-scheduler.queue.default.guaranteed-capacity</name>
<name>mapred.capacity-scheduler.queue.default.capacity</name>
<value>100</value>
<description>Percentage of the number of slots in the cluster that are
guaranteed to be available for jobs in this queue.
to be available for jobs in this queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.queue.default.reclaim-time-limit</name>
<value>300</value>
<description>The amount of time, in seconds, before which
resources distributed to other queues will be reclaimed.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.queue.default.supports-priority</name>
<value>false</value>
@ -54,28 +46,9 @@
</description>
</property>
<property>
<name>mapred.capacity-scheduler.reclaimCapacity.interval</name>
<value>5</value>
<description>The time interval, in seconds, between which the scheduler
periodically determines whether capacity needs to be reclaimed for
any queue.
</description>
</property>
<!-- The default configuration settings for the capacity task scheduler -->
<!-- The default values would be applied to all the queues which don't have -->
<!-- the appropriate property for the particular queue -->
<property>
<name>mapred.capacity-scheduler.default-reclaim-time-limit</name>
<value>300</value>
<description>The amount of time, in seconds, before which
resources distributed to other queues will be reclaimed by default
in a job queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-supports-priority</name>
<value>false</value>
@ -83,37 +56,6 @@
account in scheduling decisions by default in a job queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</name>
<value>-1</value>
<description>If mapred.task.maxpmem is set to -1, this configuration will
be used to calculate job's physical memory requirements as a percentage of
the job's virtual memory requirements set via mapred.task.maxvmem. This
property thus provides default value of physical memory for job's that
don't explicitly specify physical memory requirements.
If not explicitly set to a valid value, scheduler will not consider
physical memory for scheduling even if virtual memory based scheduling is
enabled(by setting valid values for both mapred.task.default.maxvmem and
mapred.task.limit.maxvmem).
</description>
</property>
<property>
<name>mapred.capacity-scheduler.task.limit.maxpmem</name>
<value>-1</value>
<description>Configuration that provides an upper limit on the maximum
physical memory that can be specified by a job. The job configuration
mapred.task.maxpmem should be less than this value. If not, the job will
be rejected by the scheduler.
If it is set to -1, scheduler will not consider physical memory for
scheduling even if virtual memory based scheduling is enabled(by setting
valid values for both mapred.task.default.maxvmem and
mapred.task.limit.maxvmem).
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>

View file

@ -80,10 +80,10 @@
<dependencies>
<!--used client side-->
<!-- <dependency org="commons-cli"
name="commons-cli"
rev="${commons-cli.version}"
conf="client->default"/> -->
<dependency org="commons-cli"
name="commons-cli"
rev="${commons-cli.version}"
conf="client->default"/>
<dependency org="checkstyle"
name="checkstyle"
@ -256,6 +256,10 @@
rev="${slf4j-log4j12.version}"
conf="common->master">
</dependency>
</dependencies>
<dependency org="org.mockito"
name="mockito-all"
rev="${mockito-all.version}"
conf="common->master"/>
</dependencies>
</ivy-module>

View file

@ -21,7 +21,7 @@ apacheant.version=1.7.0
checkstyle.version=4.2
commons-cli.version=2.0-SNAPSHOT
commons-cli.version=1.2
commons-codec.version=1.3
commons-collections.version=3.1
commons-httpclient.version=3.0.1
@ -57,6 +57,8 @@ kfs.version=0.1
log4j.version=1.2.15
lucene-core.version=2.3.1
mockito-all.version=1.8.0
oro.version=2.0.8
rats-lib.version=0.5.1

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more