From 9ff8ad4bec5d2d750c314588d2bc75f32b8e7ff1 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 17 Apr 2013 11:26:51 -0700 Subject: [PATCH 001/136] REVERT ME: Linking to Patrick's ec2 repo for now --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 9f2daad2b6..aa73363eea 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -386,7 +386,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k if not opts.old_scripts: # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten - ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git") + ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/pwendell/spark-ec2.git -b ec2-updates") print "Deploying files to master..." deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, From 73f8cd237e929ce6b0d80d1a5726b8fc67d7095f Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 17 Apr 2013 18:09:57 -0700 Subject: [PATCH 002/136] Adding extra instance types --- ec2/spark_ec2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index aa73363eea..0ba5335bba 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -452,7 +452,9 @@ def get_num_disks(instance_type): "m2.4xlarge": 2, "cc1.4xlarge": 2, "cc2.8xlarge": 4, - "cg1.4xlarge": 2 + "cg1.4xlarge": 2, + "hs1.8xlarge": 24, + "cr1.8xlarge": 2 } if instance_type in disks_by_instance: return disks_by_instance[instance_type] From 03b58378d06903c425c0e6d7c5d6be604f133d92 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 17 Apr 2013 18:49:18 -0700 Subject: [PATCH 003/136] Adding mapreduce module --- ec2/spark_ec2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 0ba5335bba..42e4fe9442 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -376,9 +376,10 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa') if opts.cluster_type == "mesos": - modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mesos'] + modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', 'mesos'] elif opts.cluster_type == "standalone": - modules = ['ephemeral-hdfs', 'persistent-hdfs', 'spark-standalone'] + modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', + 'spark-standalone'] if opts.ganglia: modules.append('ganglia') From f8213aab4bd3056e0c5a86b6f6d0278162151499 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 18 Apr 2013 22:31:24 -0700 Subject: [PATCH 004/136] Adding automatic resolve of AMI --- ec2/spark_ec2.py | 59 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 42e4fe9442..e917fc8dc4 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -34,8 +34,8 @@ import boto from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType from boto import ec2 -# A static URL from which to figure out the latest Mesos EC2 AMI -LATEST_AMI_URL = "https://s3.amazonaws.com/mesos-images/ids/latest-spark-0.7" +# A URL prefix from which to fetch AMI information +AMI_PREFIX = "https://raw.github.com/pwendell/spark-ec2/ec2-updates/ami-list" # Configure and parse our command-line arguments @@ -156,6 +156,48 @@ def wait_for_instances(conn, instances): def is_active(instance): return (instance.state in ['pending', 'running', 'stopping', 'stopped']) +# Attempt to resolve an appropriate AMI given the architecture and +# region of the request. +def get_spark_ami(opts): + version_prefix = opts.ami + instance_types = { + "m1.small": "pvm", + "m1.medium": "pvm", + "m1.large": "pvm", + "m1.xlarge": "pvm", + "t1.micro": "pvm", + "c1.medium": "pvm", + "c1.xlarge": "pvm", + "m2.xlarge": "pvm", + "m2.2xlarge": "pvm", + "m2.4xlarge": "pvm", + "cc1.4xlarge": "hvm", + "cc2.8xlarge": "hvm", + "cg1.4xlarge": "hvm", + "hs1.8xlarge": "hvm", + "hi1.4xlarge": "hvm", + "m3.xlarge": "hvm", + "m3.2xlarge": "hvm", + "cr1.8xlarge": "hvm" + } + if opts.instance_type in instance_types: + instance_type = instance_types[opts.instance_type] + else: + instance_type = "pvm" + print >> stderr,\ + "Don't recognize %s, assuming type is pvm" % opts.instance_type + if version_prefix != "latest": + print >> stderr, \ + "Don't know how to resolve AMI for version: %s" % version_prefix + ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version_prefix, "us-east", instance_type) + try: + ami = urllib2.urlopen(ami_path).read().strip() + print "Spark AMI: " + ami + except: + print >> stderr, "Could not read " + ami_path + sys.exit(1) + + return ami # Launch a cluster of the given name, by setting up its security groups, # and then starting new instances in them. @@ -209,13 +251,7 @@ def launch_cluster(conn, opts, cluster_name): # Figure out the latest AMI from our static URL if opts.ami == "latest": - try: - opts.ami = urllib2.urlopen(LATEST_AMI_URL).read().strip() - print "Latest Spark AMI: " + opts.ami - except: - print >> stderr, "Could not read " + LATEST_AMI_URL - sys.exit(1) - + opts.ami = get_spark_ami(opts) print "Launching instances..." try: @@ -455,7 +491,10 @@ def get_num_disks(instance_type): "cc2.8xlarge": 4, "cg1.4xlarge": 2, "hs1.8xlarge": 24, - "cr1.8xlarge": 2 + "cr1.8xlarge": 2, + "hi1.4xlarge": 2, + "m3.xlarge": 0, + "m3.2xlarge": 0 } if instance_type in disks_by_instance: return disks_by_instance[instance_type] From 976e02491e3c0417c416d85b1f9e4f1c52b0200d Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 8 May 2013 17:18:21 -0700 Subject: [PATCH 005/136] Resolve AMI region automatically --- ec2/spark_ec2.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index e917fc8dc4..82ad98fbb3 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -189,12 +189,14 @@ def get_spark_ami(opts): if version_prefix != "latest": print >> stderr, \ "Don't know how to resolve AMI for version: %s" % version_prefix - ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version_prefix, "us-east", instance_type) + + region = "-".join(opts.region.split("-")[:2]) + ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version_prefix, region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() print "Spark AMI: " + ami except: - print >> stderr, "Could not read " + ami_path + print >> stderr, "Could not resolve AMI at: " + ami_path sys.exit(1) return ami From f6c965a98b2852d4f9b3cb3c00216cf750ff9738 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 8 May 2013 21:42:08 -0700 Subject: [PATCH 006/136] Changing spark version and availability zone fix --- ec2/spark_ec2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 82ad98fbb3..c45e08809f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -64,7 +64,7 @@ def parse_args(): help="Availability zone to launch instances in, or 'all' to spread " + "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") - parser.add_option("-a", "--ami", default="latest", + parser.add_option("-a", "--ami", default="0.7.0", help="Amazon Machine Image ID to use, or 'latest' to use latest " + "available AMI (default: latest)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", @@ -190,7 +190,8 @@ def get_spark_ami(opts): print >> stderr, \ "Don't know how to resolve AMI for version: %s" % version_prefix - region = "-".join(opts.region.split("-")[:2]) + parts = opts.region.split("-") + region = "-".join(parts[0], parts[1], parts[2][0]) # strip any avail. zone ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version_prefix, region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() From 36c117a1f11252d6892c55f7f012745bf67b74df Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 8 May 2013 22:50:53 -0700 Subject: [PATCH 007/136] Slight change to AMI versioning --- ec2/spark_ec2.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index c45e08809f..a8b9b07507 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -64,9 +64,9 @@ def parse_args(): help="Availability zone to launch instances in, or 'all' to spread " + "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") - parser.add_option("-a", "--ami", default="0.7.0", - help="Amazon Machine Image ID to use, or 'latest' to use latest " + - "available AMI (default: latest)") + parser.add_option("-a", "--ami", default="v0.7.0", + help="Amazon Machine Image ID to use, or 'vX.Y.Z' to use version " + + "X.Y.Z of Spark (default: v0.7.0)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + "the given local address (for use with login)") @@ -159,7 +159,7 @@ def is_active(instance): # Attempt to resolve an appropriate AMI given the architecture and # region of the request. def get_spark_ami(opts): - version_prefix = opts.ami + version = opts.ami instance_types = { "m1.small": "pvm", "m1.medium": "pvm", @@ -186,13 +186,15 @@ def get_spark_ami(opts): instance_type = "pvm" print >> stderr,\ "Don't recognize %s, assuming type is pvm" % opts.instance_type - if version_prefix != "latest": + if version != "v0.7.0": print >> stderr, \ - "Don't know how to resolve AMI for version: %s" % version_prefix - + "Don't know how to resolve AMI for version: %s" % version + # TODO(pwendell) Once we have multiple Spark AMI versions, we should let + # people give a version flag here in place of just saying 'latest'. + version = version[1:] parts = opts.region.split("-") - region = "-".join(parts[0], parts[1], parts[2][0]) # strip any avail. zone - ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version_prefix, region, instance_type) + region = "-".join([parts[0], parts[1], parts[2][0]]) # strip any avail. zone + ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version, region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() print "Spark AMI: " + ami @@ -252,8 +254,8 @@ def launch_cluster(conn, opts, cluster_name): "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name)) sys.exit(1) - # Figure out the latest AMI from our static URL - if opts.ami == "latest": + # Figure out Spark AMI + if opts.ami[0] == "v": opts.ami = get_spark_ami(opts) print "Launching instances..." From 2197b2159a472296af30132a790b74f1442f19c1 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 9 May 2013 11:27:13 -0700 Subject: [PATCH 008/136] Removing unnecessary parsing --- ec2/spark_ec2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index a8b9b07507..b4a5925679 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -192,9 +192,7 @@ def get_spark_ami(opts): # TODO(pwendell) Once we have multiple Spark AMI versions, we should let # people give a version flag here in place of just saying 'latest'. version = version[1:] - parts = opts.region.split("-") - region = "-".join([parts[0], parts[1], parts[2][0]]) # strip any avail. zone - ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version, region, instance_type) + ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version, opts.region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() print "Spark AMI: " + ami From e8a1d029564c8f050a1b727ca23ae4829a9e0ed3 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 10 May 2013 15:13:22 -0700 Subject: [PATCH 009/136] Adding support for tag --- ec2/spark_ec2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index b4a5925679..d05b767cb8 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -64,9 +64,9 @@ def parse_args(): help="Availability zone to launch instances in, or 'all' to spread " + "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") - parser.add_option("-a", "--ami", default="v0.7.0", - help="Amazon Machine Image ID to use, or 'vX.Y.Z' to use version " + - "X.Y.Z of Spark (default: v0.7.0)") + parser.add_option("-a", "--ami", default="latest", + help="Amazon Machine Image ID to use, 'vX.Y.Z' to use version " + + "X.Y.Z of Spark, or 'latest' to use latest AMI (default: latest)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + "the given local address (for use with login)") @@ -186,7 +186,7 @@ def get_spark_ami(opts): instance_type = "pvm" print >> stderr,\ "Don't recognize %s, assuming type is pvm" % opts.instance_type - if version != "v0.7.0": + if version not in ["latest", "v0.7.0"]: print >> stderr, \ "Don't know how to resolve AMI for version: %s" % version # TODO(pwendell) Once we have multiple Spark AMI versions, we should let From a07bd286bb899e99f5992b163a74497047fce448 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sat, 18 May 2013 16:00:32 -0700 Subject: [PATCH 010/136] Some changes in response to Josh --- ec2/spark_ec2.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index d05b767cb8..7df7ae2ae4 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -186,12 +186,11 @@ def get_spark_ami(opts): instance_type = "pvm" print >> stderr,\ "Don't recognize %s, assuming type is pvm" % opts.instance_type - if version not in ["latest", "v0.7.0"]: + + version = version.replace("v", "") + if version not in ["latest", "0.7.0"]: print >> stderr, \ "Don't know how to resolve AMI for version: %s" % version - # TODO(pwendell) Once we have multiple Spark AMI versions, we should let - # people give a version flag here in place of just saying 'latest'. - version = version[1:] ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version, opts.region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() @@ -253,7 +252,7 @@ def launch_cluster(conn, opts, cluster_name): sys.exit(1) # Figure out Spark AMI - if opts.ami[0] == "v": + if "ami" not in opts.ami: opts.ami = get_spark_ami(opts) print "Launching instances..." From db5037973728f53f234fe3381ba36122e1f47bba Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 7 Jun 2013 11:54:16 -0700 Subject: [PATCH 011/136] Decoupling spark version from ami --- ec2/spark_ec2.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 7df7ae2ae4..89bb12f157 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -36,7 +36,7 @@ from boto import ec2 # A URL prefix from which to fetch AMI information AMI_PREFIX = "https://raw.github.com/pwendell/spark-ec2/ec2-updates/ami-list" - +LATEST_SPARK_VERSION = "0.7.2" # Configure and parse our command-line arguments def parse_args(): @@ -67,6 +67,8 @@ def parse_args(): parser.add_option("-a", "--ami", default="latest", help="Amazon Machine Image ID to use, 'vX.Y.Z' to use version " + "X.Y.Z of Spark, or 'latest' to use latest AMI (default: latest)") + parser.add_option("-v", "--spark-version", default="latest", + help="Version of Spark to use (X.Y.Z or 'latest' to use most recent)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + "the given local address (for use with login)") @@ -156,6 +158,16 @@ def wait_for_instances(conn, instances): def is_active(instance): return (instance.state in ['pending', 'running', 'stopping', 'stopped']) +# Return correct versions of Spark and Shark, given the supplied spark version +def get_spark_shark_version(opts): + spark_shark_map = ["0.7.2", "0.7.0"] + version = opts.spark_version.replace("v", "") + if version not in ["latest", "0.7.2"]: + print >> stderr, "Don't know about spark version: %s" % version + if version == "latest": + version = LATEST_SPARK_VERSION + return (version, spark_shark_map[version]) + # Attempt to resolve an appropriate AMI given the architecture and # region of the request. def get_spark_ami(opts): @@ -187,11 +199,7 @@ def get_spark_ami(opts): print >> stderr,\ "Don't recognize %s, assuming type is pvm" % opts.instance_type - version = version.replace("v", "") - if version not in ["latest", "0.7.0"]: - print >> stderr, \ - "Don't know how to resolve AMI for version: %s" % version - ami_path = "%s/%s/%s/%s" % (AMI_PREFIX, version, opts.region, instance_type) + ami_path = "%s/%s/%s" % (AMI_PREFIX, opts.region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() print "Spark AMI: " + ami @@ -452,8 +460,9 @@ def setup_standalone_cluster(master, slave_nodes, opts): ssh(master, opts, "/root/spark/bin/start-all.sh") def setup_spark_cluster(master, opts): + (spark_v, shark_v) = get_spark_shark_version(opts) ssh(master, opts, "chmod u+x spark-ec2/setup.sh") - ssh(master, opts, "spark-ec2/setup.sh") + ssh(master, opts, "spark-ec2/setup.sh %s %s" % (spark_v, shark_v))) if opts.cluster_type == "mesos": print "Mesos cluster started at http://%s:8080" % master elif opts.cluster_type == "standalone": From 24777362372482c5c8af12e4908b7d9681211002 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 7 Jun 2013 11:55:46 -0700 Subject: [PATCH 012/136] Adding spark shark modules --- ec2/spark_ec2.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 89bb12f157..111090dd77 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -422,10 +422,11 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa') if opts.cluster_type == "mesos": - modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', 'mesos'] + modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', + 'mapreduce', 'mesos'] elif opts.cluster_type == "standalone": - modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', - 'spark-standalone'] + modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', + 'mapreduce', 'spark-standalone'] if opts.ganglia: modules.append('ganglia') From d65d3dd8f083a826d7c8de0da8ae4680b14254d6 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 7 Jun 2013 14:16:01 -0700 Subject: [PATCH 013/136] Fixing syntax error --- ec2/spark_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 111090dd77..2243da7776 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -160,7 +160,7 @@ def is_active(instance): # Return correct versions of Spark and Shark, given the supplied spark version def get_spark_shark_version(opts): - spark_shark_map = ["0.7.2", "0.7.0"] + spark_shark_map = {"0.7.2": "0.7.0"} version = opts.spark_version.replace("v", "") if version not in ["latest", "0.7.2"]: print >> stderr, "Don't know about spark version: %s" % version @@ -463,7 +463,7 @@ def setup_standalone_cluster(master, slave_nodes, opts): def setup_spark_cluster(master, opts): (spark_v, shark_v) = get_spark_shark_version(opts) ssh(master, opts, "chmod u+x spark-ec2/setup.sh") - ssh(master, opts, "spark-ec2/setup.sh %s %s" % (spark_v, shark_v))) + ssh(master, opts, "spark-ec2/setup.sh %s %s" % (spark_v, shark_v)) if opts.cluster_type == "mesos": print "Mesos cluster started at http://%s:8080" % master elif opts.cluster_type == "standalone": From 0f4c0e461f7f4b9603bc26d274c144142e3dec16 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 7 Jun 2013 14:39:58 -0700 Subject: [PATCH 014/136] Removing now defunct ami documentation --- ec2/spark_ec2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 2243da7776..ff63e69d49 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -65,8 +65,7 @@ def parse_args(): "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") parser.add_option("-a", "--ami", default="latest", - help="Amazon Machine Image ID to use, 'vX.Y.Z' to use version " + - "X.Y.Z of Spark, or 'latest' to use latest AMI (default: latest)") + help="Amazon Machine Image ID to use (default: latest)") parser.add_option("-v", "--spark-version", default="latest", help="Version of Spark to use (X.Y.Z or 'latest' to use most recent)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", From 8637c021910c16625feabd587d93afa4838e68af Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 7 Jun 2013 14:54:26 -0700 Subject: [PATCH 015/136] Minor doc cleanup --- ec2/spark_ec2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index ff63e69d49..5c9817113a 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -157,12 +157,13 @@ def wait_for_instances(conn, instances): def is_active(instance): return (instance.state in ['pending', 'running', 'stopping', 'stopped']) -# Return correct versions of Spark and Shark, given the supplied spark version +# Return correct versions of Spark and Shark, given the supplied Spark version def get_spark_shark_version(opts): spark_shark_map = {"0.7.2": "0.7.0"} version = opts.spark_version.replace("v", "") if version not in ["latest", "0.7.2"]: - print >> stderr, "Don't know about spark version: %s" % version + print >> stderr, "Don't know about Spark version: %s" % version + sys.exit(1) if version == "latest": version = LATEST_SPARK_VERSION return (version, spark_shark_map[version]) @@ -433,6 +434,8 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k if not opts.old_scripts: # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten + # TODO: Before being merged this should be replaced with the correct repo, + # and likely a new branch (to allow backwards compatibility). ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/pwendell/spark-ec2.git -b ec2-updates") print "Deploying files to master..." From 2124563a0006a4d7cd667787633befe80ce4b7fa Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 11 Jun 2013 17:52:10 -0700 Subject: [PATCH 016/136] Remving support for old scripts --- ec2/spark_ec2.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 5c9817113a..e4b31fcf71 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -431,25 +431,18 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k if opts.ganglia: modules.append('ganglia') - if not opts.old_scripts: - # NOTE: We should clone the repository before running deploy_files to - # prevent ec2-variables.sh from being overwritten - # TODO: Before being merged this should be replaced with the correct repo, - # and likely a new branch (to allow backwards compatibility). - ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/pwendell/spark-ec2.git -b ec2-updates") + # NOTE: We should clone the repository before running deploy_files to + # prevent ec2-variables.sh from being overwritten + # TODO: Before being merged this should be replaced with the correct repo, + # and likely a new branch (to allow backwards compatibility). + ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/pwendell/spark-ec2.git -b ec2-updates") print "Deploying files to master..." deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, zoo_nodes, modules) print "Running setup on master..." - if opts.old_scripts: - if opts.cluster_type == "mesos": - setup_mesos_cluster(master, opts) - elif opts.cluster_type == "standalone": - setup_standalone_cluster(master, slave_nodes, opts) - else: - setup_spark_cluster(master, opts) + setup_spark_cluster(master, opts) print "Done!" def setup_mesos_cluster(master, opts): From bb328c8ff8aeb74c4ecc38a30bbb85f1c550ac3d Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 11 Jun 2013 19:53:23 -0700 Subject: [PATCH 017/136] Re-working spark versions to use templates --- .../root/spark-ec2/ec2-variables.sh | 2 ++ ec2/spark_ec2.py | 32 +++++++++++++------ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh index 166a884c88..453712bd25 100644 --- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh +++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh @@ -8,4 +8,6 @@ export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}" export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}" export MODULES="{{modules}}" +export SPARK_VERSION="{{spark_version}}" +export SHARK_VERSION="{{shark_version}}" export SWAP_MB="{{swap}}" diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index e4b31fcf71..5c728fb846 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -36,7 +36,6 @@ from boto import ec2 # A URL prefix from which to fetch AMI information AMI_PREFIX = "https://raw.github.com/pwendell/spark-ec2/ec2-updates/ami-list" -LATEST_SPARK_VERSION = "0.7.2" # Configure and parse our command-line arguments def parse_args(): @@ -66,8 +65,18 @@ def parse_args(): "between zones applies)") parser.add_option("-a", "--ami", default="latest", help="Amazon Machine Image ID to use (default: latest)") - parser.add_option("-v", "--spark-version", default="latest", - help="Version of Spark to use (X.Y.Z or 'latest' to use most recent)") + + parser.add_option("-v", "--spark-version", default="0.7.2", + help="Version of Spark to use: 'X.Y.Z' or a specific git hash") + parser.add_option("--spark-git-repo", + default="https://github.com/mesos/spark", + help="Github repo from which to checkout supplied commit hash") + parser.add_option("--shark-version", + help="Git hash of shark version. Used only if spark hash is also given.") + parser.add_option("--shark-git-repo", + default="https://github.com/amplab/shark", + help="Github repo from which to checkout supplied commit hash") + parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + "the given local address (for use with login)") @@ -161,11 +170,9 @@ def is_active(instance): def get_spark_shark_version(opts): spark_shark_map = {"0.7.2": "0.7.0"} version = opts.spark_version.replace("v", "") - if version not in ["latest", "0.7.2"]: + if version not in ["0.7.2"]: print >> stderr, "Don't know about Spark version: %s" % version sys.exit(1) - if version == "latest": - version = LATEST_SPARK_VERSION return (version, spark_shark_map[version]) # Attempt to resolve an appropriate AMI given the architecture and @@ -456,9 +463,8 @@ def setup_standalone_cluster(master, slave_nodes, opts): ssh(master, opts, "/root/spark/bin/start-all.sh") def setup_spark_cluster(master, opts): - (spark_v, shark_v) = get_spark_shark_version(opts) ssh(master, opts, "chmod u+x spark-ec2/setup.sh") - ssh(master, opts, "spark-ec2/setup.sh %s %s" % (spark_v, shark_v)) + ssh(master, opts, "spark-ec2/setup.sh") if opts.cluster_type == "mesos": print "Mesos cluster started at http://%s:8080" % master elif opts.cluster_type == "standalone": @@ -541,6 +547,12 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, zoo_list = "NONE" cluster_url = "%s:7077" % active_master + if "." in opts.spark_version: + (spark_v, shark_v) = get_spark_shark_version(opts) + else: + spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) + shark_v = "%s|%s" % (opts.shark_git_repo, opts.shark_version) + template_vars = { "master_list": '\n'.join([i.public_dns_name for i in master_nodes]), "active_master": active_master, @@ -551,7 +563,9 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, "mapred_local_dirs": mapred_local_dirs, "spark_local_dirs": spark_local_dirs, "swap": str(opts.swap), - "modules": '\n'.join(modules) + "modules": '\n'.join(modules), + "spark_version": spark_v, + "shark_version": shark_v } # Create a temp directory in which we will place all the files to be From eae7590d0d7c3093bcedefdf2ec4f75d7a8c62b8 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 12 Jun 2013 18:39:01 -0700 Subject: [PATCH 018/136] Support for multiple hadoop versions --- ec2/deploy.generic/root/spark-ec2/ec2-variables.sh | 1 + ec2/spark_ec2.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh index 453712bd25..72bf911a3f 100644 --- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh +++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh @@ -10,4 +10,5 @@ export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}" export MODULES="{{modules}}" export SPARK_VERSION="{{spark_version}}" export SHARK_VERSION="{{shark_version}}" +export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}" export SWAP_MB="{{swap}}" diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 5c728fb846..7d2de3e7d2 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -76,6 +76,8 @@ def parse_args(): parser.add_option("--shark-git-repo", default="https://github.com/amplab/shark", help="Github repo from which to checkout supplied commit hash") + parser.add_option("--hadoop-major-version", default="2", + help="Major version of Hadoop (default: 2)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + @@ -565,7 +567,8 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, "swap": str(opts.swap), "modules": '\n'.join(modules), "spark_version": spark_v, - "shark_version": shark_v + "shark_version": shark_v, + "hadoop_major_version": opts.hadoop_major_version } # Create a temp directory in which we will place all the files to be From d5f74aa9b7d806e2d189f20dcf1b619653507650 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 13 Jun 2013 10:30:59 -0700 Subject: [PATCH 019/136] Cleanup of AMI tag --- ec2/spark_ec2.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 7d2de3e7d2..0b4dc2077e 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -63,8 +63,8 @@ def parse_args(): help="Availability zone to launch instances in, or 'all' to spread " + "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") - parser.add_option("-a", "--ami", default="latest", - help="Amazon Machine Image ID to use (default: latest)") + parser.add_option("-a", "--ami", + help="Amazon Machine Image ID to use") parser.add_option("-v", "--spark-version", default="0.7.2", help="Version of Spark to use: 'X.Y.Z' or a specific git hash") @@ -180,7 +180,6 @@ def get_spark_shark_version(opts): # Attempt to resolve an appropriate AMI given the architecture and # region of the request. def get_spark_ami(opts): - version = opts.ami instance_types = { "m1.small": "pvm", "m1.medium": "pvm", @@ -269,7 +268,7 @@ def launch_cluster(conn, opts, cluster_name): sys.exit(1) # Figure out Spark AMI - if "ami" not in opts.ami: + if opts.ami is None: opts.ami = get_spark_ami(opts) print "Launching instances..." From 13e3dd98d86a89f9b2d2e6c9a8f03bd0b79fba70 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 26 Jun 2013 11:15:48 -0700 Subject: [PATCH 020/136] Removing mesos support --- .../root/mesos-ec2/ec2-variables.sh | 9 -- ec2/spark_ec2.py | 130 ++++-------------- 2 files changed, 30 insertions(+), 109 deletions(-) delete mode 100644 ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh diff --git a/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh b/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh deleted file mode 100644 index 50ecf83404..0000000000 --- a/ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# These variables are automatically filled in by the mesos-ec2 script. -export MESOS_MASTERS="{{master_list}}" -export MESOS_SLAVES="{{slave_list}}" -export MESOS_ZOO_LIST="{{zoo_list}}" -export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}" -export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" -export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}" diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 0b4dc2077e..d8890f26f4 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -94,17 +94,11 @@ def parse_args(): parser.add_option("--spot-price", metavar="PRICE", type="float", help="If specified, launch slaves as spot instances with the given " + "maximum price (in dollars)") - parser.add_option("--cluster-type", type="choice", metavar="TYPE", - choices=["mesos", "standalone"], default="standalone", - help="'mesos' for a Mesos cluster, 'standalone' for a standalone " + - "Spark cluster (default: standalone)") parser.add_option("--ganglia", action="store_true", default=True, help="Setup Ganglia monitoring on cluster (default: on). NOTE: " + "the Ganglia page will be publicly accessible") parser.add_option("--no-ganglia", action="store_false", dest="ganglia", help="Disable Ganglia monitoring for the cluster") - parser.add_option("--old-scripts", action="store_true", default=False, - help="Use old mesos-ec2 scripts, for Spark <= 0.6 AMIs") parser.add_option("-u", "--user", default="root", help="The SSH user you want to connect as (default: root)") parser.add_option("--delete-groups", action="store_true", default=False, @@ -119,9 +113,6 @@ def parse_args(): print >> stderr, ("ERROR: The -i or --identity-file argument is " + "required for " + action) sys.exit(1) - if opts.cluster_type not in ["mesos", "standalone"] and action == "launch": - print >> stderr, ("ERROR: Invalid cluster type: " + opts.cluster_type) - sys.exit(1) # Boto config check # http://boto.cloudhackers.com/en/latest/boto_config_tut.html @@ -219,52 +210,38 @@ def get_spark_ami(opts): # Launch a cluster of the given name, by setting up its security groups, # and then starting new instances in them. -# Returns a tuple of EC2 reservation objects for the master, slave -# and zookeeper instances (in that order). +# Returns a tuple of EC2 reservation objects for the master and slaves # Fails if there already instances running in the cluster's groups. def launch_cluster(conn, opts, cluster_name): print "Setting up security groups..." master_group = get_or_make_group(conn, cluster_name + "-master") slave_group = get_or_make_group(conn, cluster_name + "-slaves") - zoo_group = get_or_make_group(conn, cluster_name + "-zoo") if master_group.rules == []: # Group was just now created master_group.authorize(src_group=master_group) master_group.authorize(src_group=slave_group) - master_group.authorize(src_group=zoo_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') - if opts.cluster_type == "mesos": - master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') if opts.ganglia: master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0') if slave_group.rules == []: # Group was just now created slave_group.authorize(src_group=master_group) slave_group.authorize(src_group=slave_group) - slave_group.authorize(src_group=zoo_group) slave_group.authorize('tcp', 22, 22, '0.0.0.0/0') slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0') slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0') slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0') slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') - if zoo_group.rules == []: # Group was just now created - zoo_group.authorize(src_group=master_group) - zoo_group.authorize(src_group=slave_group) - zoo_group.authorize(src_group=zoo_group) - zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0') - zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0') - zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0') - zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0') # Check if instances are already running in our groups active_nodes = get_existing_cluster(conn, opts, cluster_name, die_on_error=False) if any(active_nodes): print >> stderr, ("ERROR: There are already instances running in " + - "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name)) + "group %s, %s or %s" % (master_group.name, slave_group.name)) sys.exit(1) # Figure out Spark AMI @@ -336,9 +313,9 @@ def launch_cluster(conn, opts, cluster_name): print "Canceling spot instance requests" conn.cancel_spot_instance_requests(my_req_ids) # Log a warning if any of these requests actually launched instances: - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) - running = len(master_nodes) + len(slave_nodes) + len(zoo_nodes) + running = len(master_nodes) + len(slave_nodes) if running: print >> stderr, ("WARNING: %d instances are still running" % running) sys.exit(0) @@ -379,21 +356,17 @@ def launch_cluster(conn, opts, cluster_name): master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) - zoo_nodes = [] - # Return all the instances - return (master_nodes, slave_nodes, zoo_nodes) + return (master_nodes, slave_nodes) # Get the EC2 instances in an existing cluster if available. -# Returns a tuple of lists of EC2 instance objects for the masters, -# slaves and zookeeper nodes (in that order). +# Returns a tuple of lists of EC2 instance objects for the masters and slaves def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): print "Searching for existing cluster " + cluster_name + "..." reservations = conn.get_all_instances() master_nodes = [] slave_nodes = [] - zoo_nodes = [] for res in reservations: active = [i for i in res.instances if is_active(i)] if len(active) > 0: @@ -402,13 +375,11 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): master_nodes += res.instances elif group_names == [cluster_name + "-slaves"]: slave_nodes += res.instances - elif group_names == [cluster_name + "-zoo"]: - zoo_nodes += res.instances - if any((master_nodes, slave_nodes, zoo_nodes)): - print ("Found %d master(s), %d slaves, %d ZooKeeper nodes" % - (len(master_nodes), len(slave_nodes), len(zoo_nodes))) + if any((master_nodes, slave_nodes)): + print ("Found %d master(s), %d slaves" % + (len(master_nodes), len(slave_nodes))) if (master_nodes != [] and slave_nodes != []) or not die_on_error: - return (master_nodes, slave_nodes, zoo_nodes) + return (master_nodes, slave_nodes) else: if master_nodes == [] and slave_nodes != []: print "ERROR: Could not find master in group " + cluster_name + "-master" @@ -421,7 +392,7 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): # Deploy configuration files and run setup scripts on a newly launched # or started EC2 cluster. -def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_key): +def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): master = master_nodes[0].public_dns_name if deploy_ssh_key: print "Copying SSH key %s to master..." % opts.identity_file @@ -429,12 +400,8 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k scp(master, opts, opts.identity_file, '~/.ssh/id_rsa') ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa') - if opts.cluster_type == "mesos": - modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', - 'mapreduce', 'mesos'] - elif opts.cluster_type == "standalone": - modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', - 'mapreduce', 'spark-standalone'] + modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', + 'mapreduce', 'spark-standalone'] if opts.ganglia: modules.append('ganglia') @@ -446,18 +413,12 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/pwendell/spark-ec2.git -b ec2-updates") print "Deploying files to master..." - deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, - zoo_nodes, modules) + deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules) print "Running setup on master..." setup_spark_cluster(master, opts) print "Done!" -def setup_mesos_cluster(master, opts): - ssh(master, opts, "chmod u+x mesos-ec2/setup") - ssh(master, opts, "mesos-ec2/setup %s %s %s %s" % - ("generic", "none", "master", opts.swap)) - def setup_standalone_cluster(master, slave_nodes, opts): slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes]) ssh(master, opts, "echo \"%s\" > spark/conf/slaves" % (slave_ips)) @@ -466,23 +427,18 @@ def setup_standalone_cluster(master, slave_nodes, opts): def setup_spark_cluster(master, opts): ssh(master, opts, "chmod u+x spark-ec2/setup.sh") ssh(master, opts, "spark-ec2/setup.sh") - if opts.cluster_type == "mesos": - print "Mesos cluster started at http://%s:8080" % master - elif opts.cluster_type == "standalone": - print "Spark standalone cluster started at http://%s:8080" % master + print "Spark standalone cluster started at http://%s:8080" % master if opts.ganglia: print "Ganglia started at http://%s:5080/ganglia" % master # Wait for a whole cluster (masters, slaves and ZooKeeper) to start up -def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes, zoo_nodes): +def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes): print "Waiting for instances to start up..." time.sleep(5) wait_for_instances(conn, master_nodes) wait_for_instances(conn, slave_nodes) - if zoo_nodes != []: - wait_for_instances(conn, zoo_nodes) print "Waiting %d more seconds..." % wait_secs time.sleep(wait_secs) @@ -523,8 +479,7 @@ def get_num_disks(instance_type): # cluster (e.g. lists of masters and slaves). Files are only deployed to # the first master instance in the cluster, and we expect the setup # script to be run on that instance to copy them to other nodes. -def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, - modules): +def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): active_master = master_nodes[0].public_dns_name num_disks = get_num_disks(opts.instance_type) @@ -537,16 +492,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i spark_local_dirs += ",/mnt%d/spark" % i - if zoo_nodes != []: - zoo_list = '\n'.join([i.public_dns_name for i in zoo_nodes]) - cluster_url = "zoo://" + ",".join( - ["%s:2181/mesos" % i.public_dns_name for i in zoo_nodes]) - elif opts.cluster_type == "mesos": - zoo_list = "NONE" - cluster_url = "%s:5050" % active_master - elif opts.cluster_type == "standalone": - zoo_list = "NONE" - cluster_url = "%s:7077" % active_master + cluster_url = "%s:7077" % active_master if "." in opts.spark_version: (spark_v, shark_v) = get_spark_shark_version(opts) @@ -558,7 +504,6 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes, "master_list": '\n'.join([i.public_dns_name for i in master_nodes]), "active_master": active_master, "slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]), - "zoo_list": zoo_list, "cluster_url": cluster_url, "hdfs_data_dirs": hdfs_data_dirs, "mapred_local_dirs": mapred_local_dirs, @@ -656,20 +601,20 @@ def main(): if action == "launch": if opts.resume: - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name) else: - (master_nodes, slave_nodes, zoo_nodes) = launch_cluster( + (master_nodes, slave_nodes) = launch_cluster( conn, opts, cluster_name) - wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes) - setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, True) + wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes) + setup_cluster(conn, master_nodes, slave_nodes, opts, True) elif action == "destroy": response = raw_input("Are you sure you want to destroy the cluster " + cluster_name + "?\nALL DATA ON ALL NODES WILL BE LOST!!\n" + "Destroy cluster " + cluster_name + " (y/N): ") if response == "y": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) print "Terminating master..." for inst in master_nodes: @@ -677,15 +622,11 @@ def main(): print "Terminating slaves..." for inst in slave_nodes: inst.terminate() - if zoo_nodes != []: - print "Terminating zoo..." - for inst in zoo_nodes: - inst.terminate() # Delete security groups as well if opts.delete_groups: print "Deleting security groups (this will take some time)..." - group_names = [cluster_name + "-master", cluster_name + "-slaves", cluster_name + "-zoo"] + group_names = [cluster_name + "-master", cluster_name + "-slaves"] attempt = 1; while attempt <= 3: @@ -725,7 +666,7 @@ def main(): print "Try re-running in a few minutes." elif action == "login": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name) master = master_nodes[0].public_dns_name print "Logging into master " + master + "..." @@ -736,7 +677,7 @@ def main(): (opts.identity_file, proxy_opt, opts.user, master), shell=True) elif action == "get-master": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(conn, opts, cluster_name) + (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) print master_nodes[0].public_dns_name elif action == "stop": @@ -746,7 +687,7 @@ def main(): "AMAZON EBS IF IT IS EBS-BACKED!!\n" + "Stop cluster " + cluster_name + " (y/N): ") if response == "y": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( + (master_nodes, slave_nodes) = get_existing_cluster( conn, opts, cluster_name, die_on_error=False) print "Stopping master..." for inst in master_nodes: @@ -756,15 +697,9 @@ def main(): for inst in slave_nodes: if inst.state not in ["shutting-down", "terminated"]: inst.stop() - if zoo_nodes != []: - print "Stopping zoo..." - for inst in zoo_nodes: - if inst.state not in ["shutting-down", "terminated"]: - inst.stop() elif action == "start": - (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( - conn, opts, cluster_name) + (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) print "Starting slaves..." for inst in slave_nodes: if inst.state not in ["shutting-down", "terminated"]: @@ -773,13 +708,8 @@ def main(): for inst in master_nodes: if inst.state not in ["shutting-down", "terminated"]: inst.start() - if zoo_nodes != []: - print "Starting zoo..." - for inst in zoo_nodes: - if inst.state not in ["shutting-down", "terminated"]: - inst.start() - wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes) - setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, False) + wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes) + setup_cluster(conn, master_nodes, slave_nodes, opts, False) else: print >> stderr, "Invalid action: %s" % action From 311e63f2c7eb570c83184fa6698461ffd3b08971 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 11 Jul 2013 14:13:10 -0700 Subject: [PATCH 021/136] Removing mesos terminology --- ec2/deploy.generic/root/spark-ec2/ec2-variables.sh | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh index 72bf911a3f..5ea0776d08 100644 --- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh +++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh @@ -1,12 +1,11 @@ #!/bin/bash -# These variables are automatically filled in by the mesos-ec2 script. -export MESOS_MASTERS="{{master_list}}" -export MESOS_SLAVES="{{slave_list}}" -export MESOS_ZOO_LIST="{{zoo_list}}" -export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}" -export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" -export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}" +# These variables are automatically filled in by the spark-ec2 script. +export MASTERS="{{master_list}}" +export SLAVES="{{slave_list}}" +export HDFS_DATA_DIRS="{{hdfs_data_dirs}}" +export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" +export SPARK_LOCAL_DIRS="{{spark_local_dirs}}" export MODULES="{{modules}}" export SPARK_VERSION="{{spark_version}}" export SHARK_VERSION="{{shark_version}}" From f1689185d612fbdcc3234b15eb85e1cc909a0fb1 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 11 Jul 2013 15:44:31 -0700 Subject: [PATCH 022/136] Adding ability to custom deploy only Spark --- ec2/spark_ec2.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index d8890f26f4..dd75c49abe 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -495,8 +495,15 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): cluster_url = "%s:7077" % active_master if "." in opts.spark_version: + # Pre-built deploy (spark_v, shark_v) = get_spark_shark_version(opts) + else if opts.shark_version is None: + # Spark-only custom deploy + spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) + shark_v = "" + modules = filter(lambda x: x != "shark", modules) else: + # Spark and Shark custom deploy spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) shark_v = "%s|%s" % (opts.shark_git_repo, opts.shark_version) From 13809d363c0a5b3212751561e0ecc09c601caf8a Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 11 Jul 2013 15:45:35 -0700 Subject: [PATCH 023/136] Small fix --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index dd75c49abe..425d397c3e 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -497,7 +497,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): if "." in opts.spark_version: # Pre-built deploy (spark_v, shark_v) = get_spark_shark_version(opts) - else if opts.shark_version is None: + elif opts.shark_version is None: # Spark-only custom deploy spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) shark_v = "" From 31c18a2528bf0e89a96857bab14672617a2dd35d Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 11 Jul 2013 16:18:16 -0700 Subject: [PATCH 024/136] Removing support for custom Shark version --- ec2/spark_ec2.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 425d397c3e..0ce5ce867f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -71,11 +71,6 @@ def parse_args(): parser.add_option("--spark-git-repo", default="https://github.com/mesos/spark", help="Github repo from which to checkout supplied commit hash") - parser.add_option("--shark-version", - help="Git hash of shark version. Used only if spark hash is also given.") - parser.add_option("--shark-git-repo", - default="https://github.com/amplab/shark", - help="Github repo from which to checkout supplied commit hash") parser.add_option("--hadoop-major-version", default="2", help="Major version of Hadoop (default: 2)") @@ -495,17 +490,13 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): cluster_url = "%s:7077" % active_master if "." in opts.spark_version: - # Pre-built deploy + # Pre-built spark & shark deploy (spark_v, shark_v) = get_spark_shark_version(opts) - elif opts.shark_version is None: + else: # Spark-only custom deploy spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) shark_v = "" modules = filter(lambda x: x != "shark", modules) - else: - # Spark and Shark custom deploy - spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version) - shark_v = "%s|%s" % (opts.shark_git_repo, opts.shark_version) template_vars = { "master_list": '\n'.join([i.public_dns_name for i in master_nodes]), From 8dccee16af14284eff0b9dbddcde6c8303abb474 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 11 Jul 2013 16:50:27 -0700 Subject: [PATCH 025/136] Bug fix --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 0ce5ce867f..413fcf2695 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -236,7 +236,7 @@ def launch_cluster(conn, opts, cluster_name): die_on_error=False) if any(active_nodes): print >> stderr, ("ERROR: There are already instances running in " + - "group %s, %s or %s" % (master_group.name, slave_group.name)) + "group %s or %s" % (master_group.name, slave_group.name)) sys.exit(1) # Figure out Spark AMI From 31b3c1df54fb2d3cb0aac9acbc0a13c274c64cd2 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 17 Jul 2013 17:43:15 -0700 Subject: [PATCH 026/136] Small improvement in python script --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 413fcf2695..d1db535e8c 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -158,7 +158,7 @@ def is_active(instance): def get_spark_shark_version(opts): spark_shark_map = {"0.7.2": "0.7.0"} version = opts.spark_version.replace("v", "") - if version not in ["0.7.2"]: + if version not in spark_shark_map: print >> stderr, "Don't know about Spark version: %s" % version sys.exit(1) return (version, spark_shark_map[version]) From 893aaff7386e6ae72199c9bd9497369abd180c96 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 30 Jul 2013 14:55:00 -0700 Subject: [PATCH 027/136] Don't install MapReduce for Hadoop1 --- ec2/spark_ec2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index d1db535e8c..c32c4400d7 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -398,6 +398,9 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', 'spark-standalone'] + if opts.hadoop_major_version == "1": + modules = filter(lambda x: x != "mapreduce", modules) + if opts.ganglia: modules.append('ganglia') From b7b627d5bb1a1331ea580950834533f84735df4c Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 31 Jul 2013 21:28:27 -0700 Subject: [PATCH 028/136] Updating relevant documentation --- docs/ec2-scripts.md | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md index dc57035eba..bae41f9406 100644 --- a/docs/ec2-scripts.md +++ b/docs/ec2-scripts.md @@ -4,10 +4,11 @@ title: Running Spark on EC2 --- The `spark-ec2` script, located in Spark's `ec2` directory, allows you -to launch, manage and shut down Spark clusters on Amazon EC2. It automatically sets up Mesos, Spark and HDFS -on the cluster for you. -This guide describes how to use `spark-ec2` to launch clusters, how to run jobs on them, and how to shut them down. -It assumes you've already signed up for an EC2 account on the [Amazon Web Services site](http://aws.amazon.com/). +to launch, manage and shut down Spark clusters on Amazon EC2. It automatically +sets up Spark, Shark and HDFS on the cluster for you. This guide describes +how to use `spark-ec2` to launch clusters, how to run jobs on them, and how +to shut them down. It assumes you've already signed up for an EC2 account +on the [Amazon Web Services site](http://aws.amazon.com/). `spark-ec2` is designed to manage multiple named clusters. You can launch a new cluster (telling the script its size and giving it a name), @@ -59,18 +60,22 @@ RAM). Refer to the Amazon pages about [EC2 instance types](http://aws.amazon.com/ec2/instance-types) and [EC2 pricing](http://aws.amazon.com/ec2/#pricing) for information about other instance types. +- `--region=` specifies an EC2 region in which to launch +instances. The default region is `us-east-1`. - `--zone=` can be used to specify an EC2 availability zone to launch instances in. Sometimes, you will get an error because there is not enough capacity in one zone, and you should try to launch in -another. This happens mostly with the `m1.large` instance types; -extra-large (both `m1.xlarge` and `c1.xlarge`) instances tend to be more -available. +another. - `--ebs-vol-size=GB` will attach an EBS volume with a given amount of space to each node so that you can have a persistent HDFS cluster on your nodes across cluster restarts (see below). - `--spot-price=PRICE` will launch the worker nodes as [Spot Instances](http://aws.amazon.com/ec2/spot-instances/), bidding for the given maximum price (in dollars). +- `--spark-version=VERSION` will pre-load the cluster with the + specified version of Spark. VERSION can be a version number + (e.g. "0.7.2") or a specific git hash. By default, a recent + version will be used. - If one of your launches fails due to e.g. not having the right permissions on your private key file, you can run `launch` with the `--resume` option to restart the setup process on an existing cluster. @@ -99,9 +104,8 @@ permissions on your private key file, you can run `launch` with the `spark-ec2` to attach a persistent EBS volume to each node for storing the persistent HDFS. - Finally, if you get errors while running your jobs, look at the slave's logs - for that job inside of the Mesos work directory (/mnt/mesos-work). You can - also view the status of the cluster using the Mesos web UI - (`http://:8080`). + for that job inside of the scheduler work directory (/root/spark/work). You can + also view the status of the cluster using the web UI: `http://:8080`. # Configuration @@ -141,22 +145,14 @@ section. # Limitations -- `spark-ec2` currently only launches machines in the US-East region of EC2. - It should not be hard to make it launch VMs in other zones, but you will need - to create your own AMIs in them. - Support for "cluster compute" nodes is limited -- there's no way to specify a locality group. However, you can launch slave nodes in your `-slaves` group manually and then use `spark-ec2 launch --resume` to start a cluster with them. -- Support for spot instances is limited. If you have a patch or suggestion for one of these limitations, feel free to [contribute](contributing-to-spark.html) it! -# Using a Newer Spark Version - -The Spark EC2 machine images may not come with the latest version of Spark. To use a newer version, you can run `git pull` to pull in `/root/spark` to pull in the latest version of Spark from `git`, and build it using `sbt/sbt compile`. You will also need to copy it to all the other nodes in the cluster using `~/spark-ec2/copy-dir /root/spark`. - # Accessing Data in S3 Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n:///path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3). From 56715587e4573f2a01dfacb9c13f622cfa4ba119 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 1 Aug 2013 12:27:43 -0700 Subject: [PATCH 029/136] Pointing to new EC2 repository --- ec2/spark_ec2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 4d49c60703..677c8f5452 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -408,9 +408,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten - # TODO: Before being merged this should be replaced with the correct repo, - # and likely a new branch (to allow backwards compatibility). - ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/pwendell/spark-ec2.git -b ec2-updates") + ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v2") print "Deploying files to master..." deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules) From 11057662952db09e121548448d15b9dafb63ffa1 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 1 Aug 2013 15:43:41 -0700 Subject: [PATCH 030/136] Point to new repository/branch --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 677c8f5452..54ab024b13 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -37,7 +37,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType from boto import ec2 # A URL prefix from which to fetch AMI information -AMI_PREFIX = "https://raw.github.com/pwendell/spark-ec2/ec2-updates/ami-list" +AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list" # Configure and parse our command-line arguments def parse_args(): From 7e1b7d8a96bbf2bc4655a528ef3b17a7820e313f Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 8 Aug 2013 14:17:11 -0700 Subject: [PATCH 031/136] Adding web UI port --- ec2/spark_ec2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 54ab024b13..8080cc4172 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -218,6 +218,7 @@ def launch_cluster(conn, opts, cluster_name): master_group.authorize(src_group=slave_group) master_group.authorize('tcp', 22, 22, '0.0.0.0/0') master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0') + master_group.authorize('tcp', 33000, 33000, '0.0.0.0/0') master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') From 705c9ace2a893168aadfca7d80749f3597d9a24a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 12 Aug 2013 22:08:36 -0700 Subject: [PATCH 032/136] Use less instances of the random class during ALS setup --- .../spark/mllib/recommendation/ALS.scala | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala index 6c71dc1f32..974046d260 100644 --- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala @@ -124,9 +124,18 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock) // Initialize user and product factors randomly - val seed = new Random().nextInt() - var users = userOutLinks.mapValues(_.elementIds.map(u => randomFactor(rank, seed ^ u))) - var products = productOutLinks.mapValues(_.elementIds.map(p => randomFactor(rank, seed ^ ~p))) + var users = userOutLinks.mapPartitions(itr => { + val rand = new Random() + itr.map({case (x,y) => + (x,y.elementIds.map(u => randomFactor(rank, rand))) + }) + }) + var products = productOutLinks.mapPartitions(itr => { + val rand = new Random() + itr.map({case (x,y) => + (x,y.elementIds.map(u => randomFactor(rank, rand))) + }) + }) for (iter <- 0 until iterations) { // perform ALS update @@ -213,11 +222,9 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l } /** - * Make a random factor vector with the given seed. - * TODO: Initialize things using mapPartitionsWithIndex to make it faster? + * Make a random factor vector with the given random. */ - private def randomFactor(rank: Int, seed: Int): Array[Double] = { - val rand = new Random(seed) + private def randomFactor(rank: Int, rand: Random): Array[Double] = { Array.fill(rank)(rand.nextDouble) } From d145da818ee14c01ff08b29a546fb3c87016aa51 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 12 Aug 2013 22:13:08 -0700 Subject: [PATCH 033/136] Code review feedback :) --- .../scala/spark/mllib/recommendation/ALS.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala index 974046d260..aeacbca510 100644 --- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala @@ -124,16 +124,16 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock) // Initialize user and product factors randomly - var users = userOutLinks.mapPartitions(itr => { + var users = userOutLinks.mapPartitions {itr => val rand = new Random() - itr.map({case (x,y) => - (x,y.elementIds.map(u => randomFactor(rank, rand))) + itr.map({case (x, y) => + (x, y.elementIds.map(u => randomFactor(rank, rand))) }) - }) - var products = productOutLinks.mapPartitions(itr => { + } + var products = productOutLinks.mapPartitions {itr => val rand = new Random() - itr.map({case (x,y) => - (x,y.elementIds.map(u => randomFactor(rank, rand))) + itr.map({case (x, y) => + (x, y.elementIds.map(u => randomFactor(rank, rand))) }) }) From b877e20a339872f9a29a35272e6c1f280ac901d5 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 17 Jul 2013 14:53:37 -0700 Subject: [PATCH 034/136] move yarn to its own directory --- .../main}/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala | 0 .../scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala | 0 .../src/main}/scala/spark/deploy/SparkHadoopUtil.scala | 0 .../src/main}/scala/spark/deploy/yarn/ApplicationMaster.scala | 0 .../scala/spark/deploy/yarn/ApplicationMasterArguments.scala | 0 .../src/main}/scala/spark/deploy/yarn/Client.scala | 0 .../src/main}/scala/spark/deploy/yarn/ClientArguments.scala | 0 .../src/main}/scala/spark/deploy/yarn/WorkerRunnable.scala | 0 .../src/main}/scala/spark/deploy/yarn/YarnAllocationHandler.scala | 0 .../scala/spark/scheduler/cluster/YarnClusterScheduler.scala | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/SparkHadoopUtil.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/yarn/ApplicationMaster.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/yarn/ApplicationMasterArguments.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/yarn/Client.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/yarn/ClientArguments.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/yarn/WorkerRunnable.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/deploy/yarn/YarnAllocationHandler.scala (100%) rename {core/src/hadoop2-yarn => yarn/src/main}/scala/spark/scheduler/cluster/YarnClusterScheduler.scala (100%) diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/yarn/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala rename to yarn/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/yarn/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala rename to yarn/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala b/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala rename to yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMaster.scala rename to yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/spark/deploy/yarn/ApplicationMasterArguments.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ApplicationMasterArguments.scala rename to yarn/src/main/scala/spark/deploy/yarn/ApplicationMasterArguments.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/spark/deploy/yarn/Client.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala rename to yarn/src/main/scala/spark/deploy/yarn/Client.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/spark/deploy/yarn/ClientArguments.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/yarn/ClientArguments.scala rename to yarn/src/main/scala/spark/deploy/yarn/ClientArguments.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala b/yarn/src/main/scala/spark/deploy/yarn/WorkerRunnable.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/yarn/WorkerRunnable.scala rename to yarn/src/main/scala/spark/deploy/yarn/WorkerRunnable.scala diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/src/main/scala/spark/deploy/yarn/YarnAllocationHandler.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/deploy/yarn/YarnAllocationHandler.scala rename to yarn/src/main/scala/spark/deploy/yarn/YarnAllocationHandler.scala diff --git a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/spark/scheduler/cluster/YarnClusterScheduler.scala similarity index 100% rename from core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala rename to yarn/src/main/scala/spark/scheduler/cluster/YarnClusterScheduler.scala From f67b94ad4fc8c9e7a71dd7f65d617743947ae91c Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 17 Jul 2013 17:31:26 -0700 Subject: [PATCH 035/136] remove core/src/hadoop{1,2} dirs --- .../hadoop/mapred/HadoopMapRedUtil.scala | 27 ----------- .../mapreduce/HadoopMapReduceUtil.scala | 30 ------------ .../scala/spark/deploy/SparkHadoopUtil.scala | 47 ------------------- .../hadoop/mapred/HadoopMapRedUtil.scala | 0 .../mapreduce/HadoopMapReduceUtil.scala | 0 .../scala/spark/deploy/SparkHadoopUtil.scala | 0 6 files changed, 104 deletions(-) delete mode 100644 core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala delete mode 100644 core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala delete mode 100644 core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala rename core/src/{hadoop1 => main}/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala (100%) rename core/src/{hadoop1 => main}/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala (100%) rename core/src/{hadoop1 => main}/scala/spark/deploy/SparkHadoopUtil.scala (100%) diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala deleted file mode 100644 index 4b3d84670c..0000000000 --- a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapred - -trait HadoopMapRedUtil { - def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) - - def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) - - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) -} diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala deleted file mode 100644 index aa3b1ed3a5..0000000000 --- a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapreduce - -import org.apache.hadoop.conf.Configuration -import task.{TaskAttemptContextImpl, JobContextImpl} - -trait HadoopMapReduceUtil { - def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) - - def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) - - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) -} diff --git a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala deleted file mode 100644 index 617954cb98..0000000000 --- a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package spark.deploy -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.mapred.JobConf - - -/** - * Contains util methods to interact with Hadoop from spark. - */ -object SparkHadoopUtil { - - def getUserNameFromEnvironment(): String = { - // defaulting to -D ... - System.getProperty("user.name") - } - - def runAsUser(func: (Product) => Unit, args: Product) { - - // Add support, if exists - for now, simply run func ! - func(args) - } - - // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems - def newConfiguration(): Configuration = new Configuration() - - // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster - def addCredentials(conf: JobConf) {} - - def isYarnMode(): Boolean = { false } - -} diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala similarity index 100% rename from core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala rename to core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala similarity index 100% rename from core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala rename to core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala diff --git a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala similarity index 100% rename from core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala rename to core/src/main/scala/spark/deploy/SparkHadoopUtil.scala From 69c3bbf688cdd21171413d415cfc6d6cb8e77bd5 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 17 Jul 2013 17:33:38 -0700 Subject: [PATCH 036/136] dynamically detect hadoop version --- .../hadoop/mapred/HadoopMapRedUtil.scala | 26 +++++++++++--- .../mapreduce/HadoopMapReduceUtil.scala | 30 +++++++++++++--- project/SparkBuild.scala | 35 ++----------------- 3 files changed, 51 insertions(+), 40 deletions(-) diff --git a/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala index 25386b2796..6cfafd3760 100644 --- a/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala @@ -18,10 +18,28 @@ package org.apache.hadoop.mapred trait HadoopMapRedUtil { - def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContext(conf, jobId) + def newJobContext(conf: JobConf, jobId: JobID): JobContext = { + val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl", "org.apache.hadoop.mapred.JobContext"); + val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[org.apache.hadoop.mapreduce.JobID]) + ctor.newInstance(conf, jobId).asInstanceOf[JobContext] + } - def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId) + def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = { + val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl", "org.apache.hadoop.mapred.TaskAttemptContext") + val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID]) + ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] + } - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = { + new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId) + } + + private def firstAvailableClass(first: String, second: String): Class[_] = { + try { + Class.forName(first) + } catch { + case e: ClassNotFoundException => + Class.forName(second) + } + } } diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala index b1002e0cac..0f77828dc8 100644 --- a/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala @@ -20,10 +20,32 @@ package org.apache.hadoop.mapreduce import org.apache.hadoop.conf.Configuration trait HadoopMapReduceUtil { - def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContext(conf, jobId) + def newJobContext(conf: Configuration, jobId: JobID): JobContext = { + val klass = firstAvailableClass( + "org.apache.hadoop.mapreduce.task.JobContextImpl", + "org.apache.hadoop.mapreduce.JobContext") + val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) + ctor.newInstance(conf, jobId).asInstanceOf[JobContext] + } - def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId) + def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { + val klass = firstAvailableClass( + "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", + "org.apache.hadoop.mapreduce.TaskAttemptContext") + val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) + ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] + } - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, - jobId, isMap, taskId, attemptId) + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = { + new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId) + } + + private def firstAvailableClass(first: String, second: String): Class[_] = { + try { + Class.forName(first) + } catch { + case e: ClassNotFoundException => + Class.forName(second) + } + } } diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f6519c8287..a06550bb97 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -27,13 +27,8 @@ object SparkBuild extends Build { // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. val HADOOP_VERSION = "1.0.4" - val HADOOP_MAJOR_VERSION = "1" - val HADOOP_YARN = false - - // For Hadoop 2 versions such as "2.0.0-mr1-cdh4.1.1", set the HADOOP_MAJOR_VERSION to "2" //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1" - //val HADOOP_MAJOR_VERSION = "2" - //val HADOOP_YARN = false + val HADOOP_YARN = false // For Hadoop 2 YARN support //val HADOOP_VERSION = "2.0.2-alpha" @@ -184,37 +179,13 @@ object SparkBuild extends Build { "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION, "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", "com.twitter" % "chill_2.9.3" % "0.3.1", "com.twitter" % "chill-java" % "0.3.1" - ) ++ ( - if (HADOOP_MAJOR_VERSION == "2") { - if (HADOOP_YARN) { - Seq( - // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty, excludeAsm) - ) - } else { - Seq( - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty, excludeAsm) - ) - } - } else { - Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) - }), - unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / - ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") { - "src/hadoop2-yarn/scala" - } else { - "src/hadoop" + HADOOP_MAJOR_VERSION + "/scala" - } ) - } + ) ) ++ assemblySettings ++ extraAssemblySettings def rootSettings = sharedSettings ++ Seq( From 273b499b9ac8373f0f92ebf8e4141fe51cec4a33 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 18 Jul 2013 13:36:34 -0700 Subject: [PATCH 037/136] yarn sbt --- project/SparkBuild.scala | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index a06550bb97..99351ca935 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -24,21 +24,10 @@ import AssemblyKeys._ //import com.jsuereth.pgp.sbtplugin.PgpKeys._ object SparkBuild extends Build { - // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or - // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. - val HADOOP_VERSION = "1.0.4" - //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1" - val HADOOP_YARN = false - - // For Hadoop 2 YARN support - //val HADOOP_VERSION = "2.0.2-alpha" - //val HADOOP_MAJOR_VERSION = "2" - //val HADOOP_YARN = true - // HBase version; set as appropriate. val HBASE_VERSION = "0.94.6" - lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib, tools) + lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib, tools, yarn) lazy val core = Project("core", file("core"), settings = coreSettings) @@ -54,6 +43,8 @@ object SparkBuild extends Build { lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core) + lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn (core) + // A configuration to set an alternative publishLocalConfiguration lazy val MavenCompile = config("m2r") extend(Compile) lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") @@ -179,7 +170,7 @@ object SparkBuild extends Build { "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION, + "org.apache.hadoop" % "hadoop-client" % "1.0.4", "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", @@ -244,6 +235,17 @@ object SparkBuild extends Build { ) ) ++ assemblySettings ++ extraAssemblySettings + def yarnSettings = sharedSettings ++ Seq( + name := "spark-yarn", + libraryDependencies ++= Seq( + // Exclude rule required for all ? + "org.apache.hadoop" % "hadoop-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-api" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-common" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty) + ) + ) + def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq( mergeStrategy in assembly := { case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard From 5d0785b4e5bb681770675a1729f1742a8cd3b491 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Sat, 20 Jul 2013 11:34:13 -0700 Subject: [PATCH 038/136] remove hadoop-yarn's org/apache/... --- .../hadoop/mapred/HadoopMapRedUtil.scala | 30 ------------------- .../mapreduce/HadoopMapReduceUtil.scala | 30 ------------------- 2 files changed, 60 deletions(-) delete mode 100644 yarn/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala delete mode 100644 yarn/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala diff --git a/yarn/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/yarn/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala deleted file mode 100644 index 0f972b7a0b..0000000000 --- a/yarn/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ /dev/null @@ -1,30 +0,0 @@ - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapred - -import org.apache.hadoop.mapreduce.TaskType - -trait HadoopMapRedUtil { - def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) - - def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) - - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = - new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId) -} diff --git a/yarn/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/yarn/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala deleted file mode 100644 index 1a7cdf4788..0000000000 --- a/yarn/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapreduce - -import org.apache.hadoop.conf.Configuration -import task.{TaskAttemptContextImpl, JobContextImpl} - -trait HadoopMapReduceUtil { - def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) - - def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) - - def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = - new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId) -} From 8b1c1520fc26132a21062ebb063dea25e9b36b8b Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Sat, 20 Jul 2013 11:34:18 -0700 Subject: [PATCH 039/136] add comment --- .../org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala index 0f77828dc8..dd624d8890 100644 --- a/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala @@ -22,16 +22,16 @@ import org.apache.hadoop.conf.Configuration trait HadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { val klass = firstAvailableClass( - "org.apache.hadoop.mapreduce.task.JobContextImpl", - "org.apache.hadoop.mapreduce.JobContext") + "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn + "org.apache.hadoop.mapreduce.JobContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) ctor.newInstance(conf, jobId).asInstanceOf[JobContext] } def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { val klass = firstAvailableClass( - "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", - "org.apache.hadoop.mapreduce.TaskAttemptContext") + "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn + "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] } From cb4ef19214332b5e9c2e0d0bfa0a72262122d04e Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Mon, 22 Jul 2013 14:03:31 -0700 Subject: [PATCH 040/136] yarn support --- bin/compute-classpath.cmd | 2 ++ bin/compute-classpath.sh | 2 ++ project/SparkBuild.scala | 12 ++++++------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index eb836b0ffd..9178b852e6 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -34,6 +34,7 @@ set EXAMPLES_DIR=%FWDIR%examples set BAGEL_DIR=%FWDIR%bagel set MLLIB_DIR=%FWDIR%mllib set TOOLS_DIR=%FWDIR%tools +set YARN_DIR=%FWDIR%yarn set STREAMING_DIR=%FWDIR%streaming set PYSPARK_DIR=%FWDIR%python @@ -50,6 +51,7 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\* set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes set CLASSPATH=%CLASSPATH%;%TOOLS_DIR%\target\scala-%SCALA_VERSION%\classes +set CLASSPATH=%CLASSPATH%;%YARN_DIR%\target\scala-%SCALA_VERSION%\classes rem Add hadoop conf dir - else FileSystem.*, etc fail rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index e4ce1ca848..db6fc866ab 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -37,6 +37,7 @@ EXAMPLES_DIR="$FWDIR/examples" BAGEL_DIR="$FWDIR/bagel" MLLIB_DIR="$FWDIR/mllib" TOOLS_DIR="$FWDIR/tools" +YARN_DIR="$FWDIR/yarn" STREAMING_DIR="$FWDIR/streaming" PYSPARK_DIR="$FWDIR/python" @@ -72,6 +73,7 @@ function dev_classpath { CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$TOOLS_DIR/target/scala-$SCALA_VERSION/classes" + CLASSPATH="$CLASSPATH:$YARN_DIR/target/scala-$SCALA_VERSION/classes" for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do CLASSPATH="$CLASSPATH:$jar" done diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 99351ca935..ede49ea38b 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -170,7 +170,7 @@ object SparkBuild extends Build { "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", - "org.apache.hadoop" % "hadoop-client" % "1.0.4", + "org.apache.hadoop" % "hadoop-client" % "1.0.4" excludeAll(excludeJackson, excludeNetty, excludeAsm), "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", @@ -239,12 +239,12 @@ object SparkBuild extends Build { name := "spark-yarn", libraryDependencies ++= Seq( // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-api" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-common" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty) + "org.apache.hadoop" % "hadoop-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-api" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-common" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm) ) - ) + ) ++ assemblySettings ++ extraAssemblySettings def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq( mergeStrategy in assembly := { From 43ebcb84840dc9db61e5912d9a37707c065edc5a Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Tue, 23 Jul 2013 17:37:10 -0700 Subject: [PATCH 041/136] rename HadoopMapRedUtil => SparkHadoopMapRedUtil, HadoopMapReduceUtil => SparkHadoopMapReduceUtil --- .../{HadoopMapRedUtil.scala => SparkHadoopMapRedUtil.scala} | 2 +- ...oopMapReduceUtil.scala => SparkHadoopMapReduceUtil.scala} | 2 +- core/src/main/scala/spark/HadoopWriter.scala | 2 +- core/src/main/scala/spark/PairRDDFunctions.scala | 5 +++-- core/src/main/scala/spark/rdd/NewHadoopRDD.scala | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) rename core/src/main/scala/org/apache/hadoop/mapred/{HadoopMapRedUtil.scala => SparkHadoopMapRedUtil.scala} (98%) rename core/src/main/scala/org/apache/hadoop/mapreduce/{HadoopMapReduceUtil.scala => SparkHadoopMapReduceUtil.scala} (98%) diff --git a/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala similarity index 98% rename from core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala rename to core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala index 6cfafd3760..f87460039b 100644 --- a/core/src/main/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala @@ -17,7 +17,7 @@ package org.apache.hadoop.mapred -trait HadoopMapRedUtil { +trait SparkHadoopMapRedUtil { def newJobContext(conf: JobConf, jobId: JobID): JobContext = { val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl", "org.apache.hadoop.mapred.JobContext"); val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[org.apache.hadoop.mapreduce.JobID]) diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala similarity index 98% rename from core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala rename to core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala index dd624d8890..bea6253677 100644 --- a/core/src/main/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala @@ -19,7 +19,7 @@ package org.apache.hadoop.mapreduce import org.apache.hadoop.conf.Configuration -trait HadoopMapReduceUtil { +trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn diff --git a/core/src/main/scala/spark/HadoopWriter.scala b/core/src/main/scala/spark/HadoopWriter.scala index b1fe0075a3..60840ce77e 100644 --- a/core/src/main/scala/spark/HadoopWriter.scala +++ b/core/src/main/scala/spark/HadoopWriter.scala @@ -36,7 +36,7 @@ import spark.SerializableWritable * Saves the RDD using a JobConf, which should contain an output key class, an output value class, * a filename to write to, etc, exactly like in a Hadoop MapReduce job. */ -class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRedUtil with Serializable { +class HadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoopMapRedUtil with Serializable { private val now = new Date() private val conf = new SerializableWritable(jobConf) diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index 6b0cc2fbf1..aeeac65cca 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -37,7 +37,8 @@ import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.OutputFormat import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat} -import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, Job => NewAPIHadoopJob, HadoopMapReduceUtil} +import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, + RecordWriter => NewRecordWriter, Job => NewAPIHadoopJob, SparkHadoopMapReduceUtil} import org.apache.hadoop.security.UserGroupInformation import spark.partial.BoundedDouble @@ -53,7 +54,7 @@ import spark.Partitioner._ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( self: RDD[(K, V)]) extends Logging - with HadoopMapReduceUtil + with SparkHadoopMapReduceUtil with Serializable { /** diff --git a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/spark/rdd/NewHadoopRDD.scala index 0b71608169..184685528e 100644 --- a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/spark/rdd/NewHadoopRDD.scala @@ -43,7 +43,7 @@ class NewHadoopRDD[K, V]( valueClass: Class[V], @transient conf: Configuration) extends RDD[(K, V)](sc, Nil) - with HadoopMapReduceUtil + with SparkHadoopMapReduceUtil with Logging { // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it From 4f43fd791ab0e84693e2337358c6b880a1593e54 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 24 Jul 2013 12:41:40 -0700 Subject: [PATCH 042/136] make SparkHadoopUtil a member of SparkEnv --- core/src/main/scala/spark/SparkContext.scala | 11 +++++++---- core/src/main/scala/spark/SparkEnv.scala | 2 ++ core/src/main/scala/spark/Utils.scala | 7 ++----- .../main/scala/spark/deploy/SparkHadoopUtil.scala | 2 +- .../spark/executor/StandaloneExecutorBackend.scala | 6 +++--- core/src/main/scala/spark/rdd/CheckpointRDD.scala | 14 ++++++++------ core/src/main/scala/spark/rdd/HadoopRDD.scala | 6 +++--- .../scala/spark/scheduler/InputFormatInfo.scala | 9 +++++---- .../main/scala/spark/examples/SparkHdfsLR.scala | 3 +-- 9 files changed, 32 insertions(+), 28 deletions(-) diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index 80c65dfebd..f020b2554b 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -58,7 +58,7 @@ import org.apache.hadoop.security.UserGroupInformation import org.apache.mesos.MesosNativeLibrary -import spark.deploy.{LocalSparkCluster, SparkHadoopUtil} +import spark.deploy.LocalSparkCluster import spark.partial.{ApproximateEvaluator, PartialResult} import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD} import spark.scheduler.{DAGScheduler, DAGSchedulerSource, ResultTask, ShuffleMapTask, SparkListener, @@ -241,7 +241,8 @@ class SparkContext( /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */ val hadoopConfiguration = { - val conf = SparkHadoopUtil.newConfiguration() + val env = SparkEnv.get + val conf = env.hadoop.newConfiguration() // Explicitly check for S3 environment variables if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) { conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID")) @@ -629,10 +630,11 @@ class SparkContext( logWarning("null specified as parameter to addJar", new SparkException("null specified as parameter to addJar")) } else { + val env = SparkEnv.get val uri = new URI(path) val key = uri.getScheme match { case null | "file" => - if (SparkHadoopUtil.isYarnMode()) { + if (env.hadoop.isYarnMode()) { logWarning("local jar specified as parameter to addJar under Yarn mode") return } @@ -815,8 +817,9 @@ class SparkContext( * prevent accidental overriding of checkpoint files in the existing directory. */ def setCheckpointDir(dir: String, useExisting: Boolean = false) { + val env = SparkEnv.get val path = new Path(dir) - val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) + val fs = path.getFileSystem(env.hadoop.newConfiguration()) if (!useExisting) { if (fs.exists(path)) { throw new Exception("Checkpoint directory '" + path + "' already exists.") diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala index 0adbf1d96e..73990f0423 100644 --- a/core/src/main/scala/spark/SparkEnv.scala +++ b/core/src/main/scala/spark/SparkEnv.scala @@ -25,6 +25,7 @@ import akka.remote.RemoteActorRefProvider import spark.broadcast.BroadcastManager import spark.metrics.MetricsSystem +import spark.deploy.SparkHadoopUtil import spark.storage.BlockManager import spark.storage.BlockManagerMaster import spark.network.ConnectionManager @@ -60,6 +61,7 @@ class SparkEnv ( // If executorId is NOT found, return defaultHostPort var executorIdToHostPort: Option[(String, String) => String]) { + val hadoop = new SparkHadoopUtil private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]() def stop() { diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala index 673f9a810d..7ea9b0c28a 100644 --- a/core/src/main/scala/spark/Utils.scala +++ b/core/src/main/scala/spark/Utils.scala @@ -266,8 +266,9 @@ private object Utils extends Logging { } case _ => // Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others + val env = SparkEnv.get val uri = new URI(url) - val conf = SparkHadoopUtil.newConfiguration() + val conf = env.hadoop.newConfiguration() val fs = FileSystem.get(uri, conf) val in = fs.open(new Path(uri)) val out = new FileOutputStream(tempFile) @@ -433,10 +434,6 @@ private object Utils extends Logging { try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } } } - def getUserNameFromEnvironment(): String = { - SparkHadoopUtil.getUserNameFromEnvironment - } - // Typically, this will be of order of number of nodes in cluster // If not, we should change it to LRUCache or something. private val hostPortParseResults = new ConcurrentHashMap[String, (String, Int)]() diff --git a/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala index 617954cb98..c4ed0bb17e 100644 --- a/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.mapred.JobConf /** * Contains util methods to interact with Hadoop from spark. */ -object SparkHadoopUtil { +class SparkHadoopUtil { def getUserNameFromEnvironment(): String = { // defaulting to -D ... diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala index e47fe50021..a9e06f8d54 100644 --- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala +++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala @@ -22,9 +22,8 @@ import java.nio.ByteBuffer import akka.actor.{ActorRef, Actor, Props, Terminated} import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected} -import spark.{Logging, Utils} +import spark.{Logging, Utils, SparkEnv} import spark.TaskState.TaskState -import spark.deploy.SparkHadoopUtil import spark.scheduler.cluster.StandaloneClusterMessages._ import spark.util.AkkaUtils @@ -82,7 +81,8 @@ private[spark] class StandaloneExecutorBackend( private[spark] object StandaloneExecutorBackend { def run(driverUrl: String, executorId: String, hostname: String, cores: Int) { - SparkHadoopUtil.runAsUser(run0, Tuple4[Any, Any, Any, Any] (driverUrl, executorId, hostname, cores)) + val env = SparkEnv.get + env.hadoop.runAsUser(run0, Tuple4[Any, Any, Any, Any] (driverUrl, executorId, hostname, cores)) } // This will be run 'as' the user diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/spark/rdd/CheckpointRDD.scala index 6794e0e201..1ad5fe6539 100644 --- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala +++ b/core/src/main/scala/spark/rdd/CheckpointRDD.scala @@ -25,7 +25,6 @@ import org.apache.hadoop.util.ReflectionUtils import org.apache.hadoop.fs.Path import java.io.{File, IOException, EOFException} import java.text.NumberFormat -import spark.deploy.SparkHadoopUtil private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {} @@ -82,8 +81,9 @@ private[spark] object CheckpointRDD extends Logging { } def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) { + val env = SparkEnv.get val outputDir = new Path(path) - val fs = outputDir.getFileSystem(SparkHadoopUtil.newConfiguration()) + val fs = outputDir.getFileSystem(env.hadoop.newConfiguration()) val finalOutputName = splitIdToFile(ctx.splitId) val finalOutputPath = new Path(outputDir, finalOutputName) @@ -101,7 +101,7 @@ private[spark] object CheckpointRDD extends Logging { // This is mainly for testing purpose fs.create(tempOutputPath, false, bufferSize, fs.getDefaultReplication, blockSize) } - val serializer = SparkEnv.get.serializer.newInstance() + val serializer = env.serializer.newInstance() val serializeStream = serializer.serializeStream(fileOutputStream) serializeStream.writeAll(iterator) serializeStream.close() @@ -121,10 +121,11 @@ private[spark] object CheckpointRDD extends Logging { } def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = { - val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) + val env = SparkEnv.get + val fs = path.getFileSystem(env.hadoop.newConfiguration()) val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt val fileInputStream = fs.open(path, bufferSize) - val serializer = SparkEnv.get.serializer.newInstance() + val serializer = env.serializer.newInstance() val deserializeStream = serializer.deserializeStream(fileInputStream) // Register an on-task-completion callback to close the input stream. @@ -140,10 +141,11 @@ private[spark] object CheckpointRDD extends Logging { import spark._ val Array(cluster, hdfsPath) = args + val env = SparkEnv.get val sc = new SparkContext(cluster, "CheckpointRDD Test") val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000) val path = new Path(hdfsPath, "temp") - val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) + val fs = path.getFileSystem(env.hadoop.newConfiguration()) sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, 1024) _) val cpRDD = new CheckpointRDD[Int](sc, path.toString) assert(cpRDD.partitions.length == rdd.partitions.length, "Number of partitions is not the same") diff --git a/core/src/main/scala/spark/rdd/HadoopRDD.scala b/core/src/main/scala/spark/rdd/HadoopRDD.scala index fd00d59c77..6c41b97780 100644 --- a/core/src/main/scala/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/spark/rdd/HadoopRDD.scala @@ -32,8 +32,7 @@ import org.apache.hadoop.mapred.RecordReader import org.apache.hadoop.mapred.Reporter import org.apache.hadoop.util.ReflectionUtils -import spark.deploy.SparkHadoopUtil -import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext} +import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, SparkEnv, TaskContext} import spark.util.NextIterator import org.apache.hadoop.conf.Configurable @@ -68,7 +67,8 @@ class HadoopRDD[K, V]( private val confBroadcast = sc.broadcast(new SerializableWritable(conf)) override def getPartitions: Array[Partition] = { - SparkHadoopUtil.addCredentials(conf); + val env = SparkEnv.get + env.hadoop.addCredentials(conf) val inputFormat = createInputFormat(conf) if (inputFormat.isInstanceOf[Configurable]) { inputFormat.asInstanceOf[Configurable].setConf(conf) diff --git a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/spark/scheduler/InputFormatInfo.scala index 65f8c3200e..8f1b9b29b5 100644 --- a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala +++ b/core/src/main/scala/spark/scheduler/InputFormatInfo.scala @@ -17,7 +17,7 @@ package spark.scheduler -import spark.Logging +import spark.{Logging, SparkEnv} import scala.collection.immutable.Set import org.apache.hadoop.mapred.{FileInputFormat, JobConf} import org.apache.hadoop.security.UserGroupInformation @@ -26,7 +26,6 @@ import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.conf.Configuration import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import scala.collection.JavaConversions._ -import spark.deploy.SparkHadoopUtil /** @@ -88,8 +87,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl // This method does not expect failures, since validate has already passed ... private def prefLocsFromMapreduceInputFormat(): Set[SplitInfo] = { + val env = SparkEnv.get val conf = new JobConf(configuration) - SparkHadoopUtil.addCredentials(conf); + env.hadoop.addCredentials(conf) FileInputFormat.setInputPaths(conf, path) val instance: org.apache.hadoop.mapreduce.InputFormat[_, _] = @@ -108,8 +108,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl // This method does not expect failures, since validate has already passed ... private def prefLocsFromMapredInputFormat(): Set[SplitInfo] = { + val env = SparkEnv.get val jobConf = new JobConf(configuration) - SparkHadoopUtil.addCredentials(jobConf); + env.hadoop.addCredentials(jobConf) FileInputFormat.setInputPaths(jobConf, path) val instance: org.apache.hadoop.mapred.InputFormat[_, _] = diff --git a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/spark/examples/SparkHdfsLR.scala index ef6e09a8e8..43c9115664 100644 --- a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/spark/examples/SparkHdfsLR.scala @@ -21,7 +21,6 @@ import java.util.Random import scala.math.exp import spark.util.Vector import spark._ -import spark.deploy.SparkHadoopUtil import spark.scheduler.InputFormatInfo /** @@ -52,7 +51,7 @@ object SparkHdfsLR { System.exit(1) } val inputPath = args(1) - val conf = SparkHadoopUtil.newConfiguration() + val conf = SparkEnv.get.hadoop.newConfiguration() val sc = new SparkContext(args(0), "SparkHdfsLR", System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")), Map(), InputFormatInfo.computePreferredLocations( From bd0bab47c9602462628b1d3c90d5eb5d889f4596 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 24 Jul 2013 13:07:27 -0700 Subject: [PATCH 043/136] SparkEnv isn't available this early, and not needed anyway --- .../scala/spark/deploy/SparkHadoopUtil.scala | 11 ----------- .../executor/StandaloneExecutorBackend.scala | 14 -------------- .../scala/spark/deploy/SparkHadoopUtil.scala | 16 ---------------- 3 files changed, 41 deletions(-) diff --git a/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala index c4ed0bb17e..882161e669 100644 --- a/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/spark/deploy/SparkHadoopUtil.scala @@ -25,17 +25,6 @@ import org.apache.hadoop.mapred.JobConf */ class SparkHadoopUtil { - def getUserNameFromEnvironment(): String = { - // defaulting to -D ... - System.getProperty("user.name") - } - - def runAsUser(func: (Product) => Unit, args: Product) { - - // Add support, if exists - for now, simply run func ! - func(args) - } - // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems def newConfiguration(): Configuration = new Configuration() diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala index a9e06f8d54..b5fb6dbe29 100644 --- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala +++ b/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala @@ -81,20 +81,6 @@ private[spark] class StandaloneExecutorBackend( private[spark] object StandaloneExecutorBackend { def run(driverUrl: String, executorId: String, hostname: String, cores: Int) { - val env = SparkEnv.get - env.hadoop.runAsUser(run0, Tuple4[Any, Any, Any, Any] (driverUrl, executorId, hostname, cores)) - } - - // This will be run 'as' the user - def run0(args: Product) { - assert(4 == args.productArity) - runImpl(args.productElement(0).asInstanceOf[String], - args.productElement(1).asInstanceOf[String], - args.productElement(2).asInstanceOf[String], - args.productElement(3).asInstanceOf[Int]) - } - - private def runImpl(driverUrl: String, executorId: String, hostname: String, cores: Int) { // Debug code Utils.checkHost(hostname) diff --git a/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala b/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala index 6122fdced0..a812bcf867 100644 --- a/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala +++ b/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala @@ -32,22 +32,6 @@ object SparkHadoopUtil { val yarnConf = newConfiguration() - def getUserNameFromEnvironment(): String = { - // defaulting to env if -D is not present ... - val retval = System.getProperty(Environment.USER.name, System.getenv(Environment.USER.name)) - - // If nothing found, default to user we are running as - if (retval == null) System.getProperty("user.name") else retval - } - - def runAsUser(func: (Product) => Unit, args: Product) { - runAsUser(func, args, getUserNameFromEnvironment()) - } - - def runAsUser(func: (Product) => Unit, args: Product, user: String) { - func(args) - } - // Note that all params which start with SPARK are propagated all the way through, so if in yarn mode, this MUST be set to true. def isYarnMode(): Boolean = { val yarnMode = System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")) From e2d7656ca3d561f0a6fc8dd81ef46e4aa6ba608e Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 24 Jul 2013 14:01:48 -0700 Subject: [PATCH 044/136] re-enable YARN support --- core/src/main/scala/spark/SparkEnv.scala | 14 +++++++++- .../spark/deploy/yarn/ApplicationMaster.scala | 4 +-- .../main/scala/spark/deploy/yarn/Client.scala | 7 +++-- .../YarnSparkHadoopUtil.scala} | 26 +++++-------------- 4 files changed, 26 insertions(+), 25 deletions(-) rename yarn/src/main/scala/spark/deploy/{SparkHadoopUtil.scala => yarn/YarnSparkHadoopUtil.scala} (71%) diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala index 73990f0423..5f71df33b6 100644 --- a/core/src/main/scala/spark/SparkEnv.scala +++ b/core/src/main/scala/spark/SparkEnv.scala @@ -61,9 +61,21 @@ class SparkEnv ( // If executorId is NOT found, return defaultHostPort var executorIdToHostPort: Option[(String, String) => String]) { - val hadoop = new SparkHadoopUtil private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]() + val hadoop = { + val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE"))) + if(yarnMode) { + try { + Class.forName("spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil] + } catch { + case th: Throwable => throw new SparkException("Unable to load YARN support", th) + } + } else { + new SparkHadoopUtil + } + } + def stop() { pythonWorkers.foreach { case(key, worker) => worker.stop() } httpFileServer.stop() diff --git a/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala index 1b06169739..d69a969d42 100644 --- a/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala @@ -130,11 +130,11 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e try { val socket = new Socket(driverHost, driverPort.toInt) socket.close() - logInfo("Master now available: " + driverHost + ":" + driverPort) + logInfo("Driver now available: " + driverHost + ":" + driverPort) driverUp = true } catch { case e: Exception => - logError("Failed to connect to driver at " + driverHost + ":" + driverPort) + logWarning("Failed to connect to driver at " + driverHost + ":" + driverPort + ", retrying") Thread.sleep(100) } } diff --git a/yarn/src/main/scala/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/spark/deploy/yarn/Client.scala index 8bcbfc2735..9d3860b863 100644 --- a/yarn/src/main/scala/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/spark/deploy/yarn/Client.scala @@ -165,7 +165,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl Apps.addToEnvironment(env, Environment.CLASSPATH.name, "./*") Apps.addToEnvironment(env, Environment.CLASSPATH.name, "$CLASSPATH") Client.populateHadoopClasspath(yarnConf, env) - SparkHadoopUtil.setYarnMode(env) + env("SPARK_YARN_MODE") = "true" env("SPARK_YARN_JAR_PATH") = localResources("spark.jar").getResource().getScheme.toString() + "://" + localResources("spark.jar").getResource().getFile().toString() @@ -313,8 +313,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl object Client { def main(argStrings: Array[String]) { + // Set an env variable indicating we are running in YARN mode. + // Note that anything with SPARK prefix gets propagated to all (remote) processes + System.setProperty("SPARK_YARN_MODE", "true") + val args = new ClientArguments(argStrings) - SparkHadoopUtil.setYarnMode() new Client(args).run } diff --git a/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala b/yarn/src/main/scala/spark/deploy/yarn/YarnSparkHadoopUtil.scala similarity index 71% rename from yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala rename to yarn/src/main/scala/spark/deploy/yarn/YarnSparkHadoopUtil.scala index a812bcf867..77c4ee7f3f 100644 --- a/yarn/src/main/scala/spark/deploy/SparkHadoopUtil.scala +++ b/yarn/src/main/scala/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -15,8 +15,9 @@ * limitations under the License. */ -package spark.deploy +package spark.deploy.yarn +import spark.deploy.SparkHadoopUtil import collection.mutable.HashMap import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.security.UserGroupInformation @@ -28,32 +29,17 @@ import java.security.PrivilegedExceptionAction /** * Contains util methods to interact with Hadoop from spark. */ -object SparkHadoopUtil { - - val yarnConf = newConfiguration() +class YarnSparkHadoopUtil extends SparkHadoopUtil { // Note that all params which start with SPARK are propagated all the way through, so if in yarn mode, this MUST be set to true. - def isYarnMode(): Boolean = { - val yarnMode = System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")) - java.lang.Boolean.valueOf(yarnMode) - } - - // Set an env variable indicating we are running in YARN mode. - // Note that anything with SPARK prefix gets propagated to all (remote) processes - def setYarnMode() { - System.setProperty("SPARK_YARN_MODE", "true") - } - - def setYarnMode(env: HashMap[String, String]) { - env("SPARK_YARN_MODE") = "true" - } + override def isYarnMode(): Boolean = { true } // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems // Always create a new config, dont reuse yarnConf. - def newConfiguration(): Configuration = new YarnConfiguration(new Configuration()) + override def newConfiguration(): Configuration = new YarnConfiguration(new Configuration()) // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster - def addCredentials(conf: JobConf) { + override def addCredentials(conf: JobConf) { val jobCreds = conf.getCredentials(); jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials()) } From 8bb0bd11cea8a16d21c56c1b80b31a1c5605a414 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 24 Jul 2013 14:16:50 -0700 Subject: [PATCH 045/136] YARN ApplicationMaster shouldn't wait forever --- .../main/scala/spark/deploy/yarn/ApplicationMaster.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala index d69a969d42..15dbd1c0fb 100644 --- a/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/spark/deploy/yarn/ApplicationMaster.scala @@ -124,7 +124,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e private def waitForSparkMaster() { logInfo("Waiting for spark driver to be reachable.") var driverUp = false - while(!driverUp) { + var tries = 0 + while(!driverUp && tries < 10) { val driverHost = System.getProperty("spark.driver.host") val driverPort = System.getProperty("spark.driver.port") try { @@ -136,6 +137,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e case e: Exception => logWarning("Failed to connect to driver at " + driverHost + ":" + driverPort + ", retrying") Thread.sleep(100) + tries = tries + 1 } } } @@ -176,7 +178,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e var sparkContext: SparkContext = null ApplicationMaster.sparkContextRef.synchronized { var count = 0 - while (ApplicationMaster.sparkContextRef.get() == null) { + while (ApplicationMaster.sparkContextRef.get() == null && count < 10) { logInfo("Waiting for spark context initialization ... " + count) count = count + 1 ApplicationMaster.sparkContextRef.wait(10000L) From 14b6bcdf93642624c42fa04aeaff9fff97f6e07f Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Mon, 29 Jul 2013 16:04:48 -0700 Subject: [PATCH 046/136] update YARN docs --- docs/running-on-yarn.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 66fb8d73e8..9c2cedfd88 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -55,7 +55,7 @@ This would be used to connect to the cluster, write to the dfs and submit jobs t The command to launch the YARN Client is as follows: - SPARK_JAR= ./run spark.deploy.yarn.Client \ + SPARK_JAR= ./run spark.deploy.yarn.Client \ --jar \ --class \ --args \ @@ -68,7 +68,7 @@ The command to launch the YARN Client is as follows: For example: - SPARK_JAR=./core/target/spark-core-assembly-{{site.SPARK_VERSION}}.jar ./run spark.deploy.yarn.Client \ + SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./run spark.deploy.yarn.Client \ --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar \ --class spark.examples.SparkPi \ --args yarn-standalone \ From 8f979edef5b80967b81323e13dcafd5aac92feb1 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Tue, 6 Aug 2013 15:47:49 -0700 Subject: [PATCH 047/136] Fix newTaskAttemptID to work under YARN --- .../mapreduce/SparkHadoopMapReduceUtil.scala | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala index bea6253677..93180307fa 100644 --- a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce import org.apache.hadoop.conf.Configuration +import java.lang.{Integer => JInteger, Boolean => JBoolean} trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { @@ -37,7 +38,24 @@ trait SparkHadoopMapReduceUtil { } def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = { - new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId) + val klass = Class.forName("org.apache.hadoop.mapreduce.TaskAttemptID"); + try { + // first, attempt to use the old-style constructor that takes a boolean isMap (not available in YARN) + val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], + classOf[Int], classOf[Int]) + ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new + JInteger(attemptId)).asInstanceOf[TaskAttemptID] + } catch { + case exc: NoSuchMethodException => { + // failed, look for the new ctor that takes a TaskType (not available in 1.x) + val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType").asInstanceOf[Class[Enum[_]]] + val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke(taskTypeClass, if(isMap) "MAP" else "REDUCE") + val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, + classOf[Int], classOf[Int]) + ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new + JInteger(attemptId)).asInstanceOf[TaskAttemptID] + } + } } private def firstAvailableClass(first: String, second: String): Class[_] = { From a06a9d5c5fd6584a57292a0115253e0a8a45d490 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Tue, 6 Aug 2013 16:16:07 -0700 Subject: [PATCH 048/136] Rename HadoopWriter to SparkHadoopWriter since it's outside of our package --- core/src/main/scala/spark/PairRDDFunctions.scala | 6 +++--- .../spark/{HadoopWriter.scala => SparkHadoopWriter.scala} | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) rename core/src/main/scala/spark/{HadoopWriter.scala => SparkHadoopWriter.scala} (96%) diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index aeeac65cca..6701f24ff9 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -32,7 +32,7 @@ import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.mapred.FileOutputCommitter import org.apache.hadoop.mapred.FileOutputFormat -import org.apache.hadoop.mapred.HadoopWriter +import org.apache.hadoop.mapred.SparkHadoopWriter import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.OutputFormat @@ -653,7 +653,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( conf.set("mapred.output.compression.type", CompressionType.BLOCK.toString) } conf.setOutputCommitter(classOf[FileOutputCommitter]) - FileOutputFormat.setOutputPath(conf, HadoopWriter.createPathFromString(path, conf)) + FileOutputFormat.setOutputPath(conf, SparkHadoopWriter.createPathFromString(path, conf)) saveAsHadoopDataset(conf) } @@ -679,7 +679,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( logInfo("Saving as hadoop file of type (" + keyClass.getSimpleName+ ", " + valueClass.getSimpleName+ ")") - val writer = new HadoopWriter(conf) + val writer = new SparkHadoopWriter(conf) writer.preSetup() def writeToFile(context: TaskContext, iter: Iterator[(K,V)]) { diff --git a/core/src/main/scala/spark/HadoopWriter.scala b/core/src/main/scala/spark/SparkHadoopWriter.scala similarity index 96% rename from core/src/main/scala/spark/HadoopWriter.scala rename to core/src/main/scala/spark/SparkHadoopWriter.scala index 60840ce77e..6b330ef572 100644 --- a/core/src/main/scala/spark/HadoopWriter.scala +++ b/core/src/main/scala/spark/SparkHadoopWriter.scala @@ -36,7 +36,7 @@ import spark.SerializableWritable * Saves the RDD using a JobConf, which should contain an output key class, an output value class, * a filename to write to, etc, exactly like in a Hadoop MapReduce job. */ -class HadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoopMapRedUtil with Serializable { +class SparkHadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoopMapRedUtil with Serializable { private val now = new Date() private val conf = new SerializableWritable(jobConf) @@ -165,7 +165,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoop splitID = splitid attemptID = attemptid - jID = new SerializableWritable[JobID](HadoopWriter.createJobID(now, jobid)) + jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobid)) taID = new SerializableWritable[TaskAttemptID]( new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID)) } @@ -179,7 +179,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoop } } -object HadoopWriter { +object SparkHadoopWriter { def createJobID(time: Date, id: Int): JobID = { val formatter = new SimpleDateFormat("yyyyMMddHHmm") val jobtrackerID = formatter.format(new Date()) From a0f08484636395394ac2e0d22ca0ca3e1606664c Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 14 Aug 2013 15:36:12 -0700 Subject: [PATCH 049/136] Update default version of Hadoop to 1.2.1 --- pom.xml | 2 +- project/SparkBuild.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7e6d38df9f..135b18bd26 100644 --- a/pom.xml +++ b/pom.xml @@ -540,7 +540,7 @@ org.apache.hadoop hadoop-core - 1.0.4 + 1.2.1 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index ede49ea38b..350a36a964 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -170,7 +170,7 @@ object SparkBuild extends Build { "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", - "org.apache.hadoop" % "hadoop-client" % "1.0.4" excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-client" % "1.2.1" excludeAll(excludeJackson, excludeNetty, excludeAsm), "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", From 8fc40818d714651c0fb360a26b64a3ab12559961 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 15 Aug 2013 23:08:48 -0700 Subject: [PATCH 050/136] Fix --- mllib/src/main/scala/spark/mllib/recommendation/ALS.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala index aeacbca510..9097f46db9 100644 --- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala @@ -133,9 +133,9 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l var products = productOutLinks.mapPartitions {itr => val rand = new Random() itr.map({case (x, y) => - (x, y.elementIds.map(u => randomFactor(rank, rand))) + (x, y.elementIds.map(u => randomFactor(rank, rand))) }) - }) + } for (iter <- 0 until iterations) { // perform ALS update From 3f98eff63a3df35f6dc56f0786c828cdbe4ffcf1 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 14 Aug 2013 17:34:34 -0700 Subject: [PATCH 051/136] Allow make-distribution.sh to specify Hadoop version used --- make-distribution.sh | 53 +++++++++++++++++++++++++++++++--------- project/SparkBuild.scala | 28 ++++++++++++++++----- 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/make-distribution.sh b/make-distribution.sh index 0a8941c1f8..a101024de5 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -24,9 +24,10 @@ # so it is completely self contained. # It does not contain source or *.class files. # -# Arguments -# (none): Creates dist/ directory -# tgz: Additionally creates spark-$VERSION-bin.tar.gz +# Optional Arguments +# --tgz: Additionally creates spark-$VERSION-bin.tar.gz +# --hadoop VERSION: Builds against specified version of Hadoop. +# --with-yarn: Enables support for Hadoop YARN. # # Recommended deploy/testing procedure (standalone mode): # 1) Rsync / deploy the dist/ dir to one host @@ -44,20 +45,50 @@ DISTDIR="$FWDIR/dist" export TERM=dumb # Prevents color codes in SBT output VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') -if [ "$1" == "tgz" ]; then - echo "Making spark-$VERSION-bin.tar.gz" +# Initialize defaults +SPARK_HADOOP_VERSION=1.2.1 +SPARK_YARN_MODE=false +MAKE_TGZ=false + +# Parse arguments +while (( "$#" )); do + case $1 in + --hadoop) + SPARK_HADOOP_VERSION="$2" + shift + ;; + --with-yarn) + SPARK_YARN_MODE=true + ;; + --tgz) + MAKE_TGZ=true + ;; + esac + shift +done + +if [ "$MAKE_TGZ" == "true" ]; then + echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" else echo "Making distribution for Spark $VERSION in $DISTDIR..." fi +echo "Hadoop version set to $SPARK_HADOOP_VERSION" +if [ "$SPARK_YARN_MODE" == "true" ]; then + echo "YARN enabled" +else + echo "YARN disabled" +fi # Build fat JAR -$FWDIR/sbt/sbt "repl/assembly" +export SPARK_HADOOP_VERSION +export SPARK_YARN_MODE +"$FWDIR/sbt/sbt" "repl/assembly" # Make directories rm -rf "$DISTDIR" mkdir -p "$DISTDIR/jars" -echo "$VERSION" >$DISTDIR/RELEASE +echo "$VERSION" > "$DISTDIR/RELEASE" # Copy jars cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/" @@ -69,9 +100,9 @@ cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR" cp "$FWDIR/spark-executor" "$DISTDIR" -if [ "$1" == "tgz" ]; then +if [ "$MAKE_TGZ" == "true" ]; then TARDIR="$FWDIR/spark-$VERSION" - cp -r $DISTDIR $TARDIR - tar -zcf spark-$VERSION-bin.tar.gz -C $FWDIR spark-$VERSION - rm -rf $TARDIR + cp -r "$DISTDIR" "$TARDIR" + tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION" + rm -rf "$TARDIR" fi diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 350a36a964..23c7179919 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -24,10 +24,15 @@ import AssemblyKeys._ //import com.jsuereth.pgp.sbtplugin.PgpKeys._ object SparkBuild extends Build { + // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or + // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. + val HADOOP_VERSION = "1.2.1" + val HADOOP_YARN = false + // HBase version; set as appropriate. val HBASE_VERSION = "0.94.6" - lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib, tools, yarn) + lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects:_*) lazy val core = Project("core", file("core"), settings = coreSettings) @@ -49,6 +54,17 @@ object SparkBuild extends Build { lazy val MavenCompile = config("m2r") extend(Compile) lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") + // Allows build configuration to be set through environment variables + lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", HADOOP_VERSION) + lazy val isYarnMode = scala.util.Properties.envOrNone("SPARK_YARN_MODE") match { + case None => HADOOP_YARN + case Some(v) => v.toBoolean + } + + // Conditionally include the yarn sub-project + lazy val maybeYarn = if(isYarnMode) Seq[ProjectReference](yarn) else Seq[ProjectReference]() + lazy val allProjects = Seq[ProjectReference](core, repl, examples, bagel, streaming, mllib, tools) ++ maybeYarn + def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", version := "0.8.0-SNAPSHOT", @@ -170,7 +186,7 @@ object SparkBuild extends Build { "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", - "org.apache.hadoop" % "hadoop-client" % "1.2.1" excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", @@ -239,10 +255,10 @@ object SparkBuild extends Build { name := "spark-yarn", libraryDependencies ++= Seq( // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-api" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-common" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm) + "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm) ) ) ++ assemblySettings ++ extraAssemblySettings From 8add2d7a59c59e72539da86a58b9c2980843f1e0 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 14 Aug 2013 17:49:42 -0700 Subject: [PATCH 052/136] Fix repl/assembly when YARN enabled --- project/SparkBuild.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 23c7179919..fa9ec7deca 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -36,7 +36,7 @@ object SparkBuild extends Build { lazy val core = Project("core", file("core"), settings = coreSettings) - lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core) dependsOn(bagel) dependsOn(mllib) + lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn(core) dependsOn(bagel) dependsOn(mllib) dependsOn(maybeYarn:_*) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core) dependsOn (streaming) dependsOn(mllib) @@ -62,8 +62,9 @@ object SparkBuild extends Build { } // Conditionally include the yarn sub-project - lazy val maybeYarn = if(isYarnMode) Seq[ProjectReference](yarn) else Seq[ProjectReference]() - lazy val allProjects = Seq[ProjectReference](core, repl, examples, bagel, streaming, mllib, tools) ++ maybeYarn + lazy val maybeYarn = if(isYarnMode) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() + lazy val maybeYarnRef = if(isYarnMode) Seq[ProjectReference](yarn) else Seq[ProjectReference]() + lazy val allProjects = Seq[ProjectReference](core, repl, examples, bagel, streaming, mllib, tools) ++ maybeYarnRef def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", From 353fab2440dbf1369df20393e0377de2b327de72 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 15 Aug 2013 12:10:31 -0700 Subject: [PATCH 053/136] Initial changes to make Maven build agnostic of hadoop version --- assembly/pom.xml | 8 +-- bagel/pom.xml | 43 ++++---------- core/pom.xml | 63 ++------------------ examples/pom.xml | 144 +++++++++++++++++++--------------------------- mllib/pom.xml | 43 ++++---------- pom.xml | 32 ++++------- repl-bin/pom.xml | 76 ++++++++++-------------- repl/pom.xml | 72 +++++++---------------- streaming/pom.xml | 42 ++++---------- tools/pom.xml | 53 +++++------------ 10 files changed, 175 insertions(+), 401 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index cc5a4875af..76ac9f5478 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -38,12 +38,6 @@ - - hadoop1 - - hadoop1 - - hadoop2 @@ -89,4 +83,4 @@ ${project.version} - \ No newline at end of file + diff --git a/bagel/pom.xml b/bagel/pom.xml index 60bbc49e6c..3c82af3b33 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -32,11 +32,20 @@ http://spark-project.org/ + + org.spark-project + spark-core + ${project.version} + + + org.apache.hadoop + hadoop-client + provided + org.eclipse.jetty jetty-server - org.scalatest scalatest_${scala.version} @@ -60,33 +69,6 @@ - - hadoop1 - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.apache.hadoop - hadoop-core - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 @@ -96,11 +78,6 @@ ${project.version} hadoop2 - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client diff --git a/core/pom.xml b/core/pom.xml index dfadd22d42..680ae94a11 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -32,6 +32,11 @@ http://spark-project.org/ + + org.apache.hadoop + hadoop-client + provided + org.eclipse.jetty jetty-server @@ -130,7 +135,6 @@ com.codahale.metrics metrics-json - org.apache.derby derby @@ -210,66 +214,9 @@ - - hadoop1 - - - org.apache.hadoop - hadoop-core - provided - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - src/main/scala - src/hadoop1/scala - - - - - add-scala-test-sources - generate-test-sources - - add-test-source - - - - src/test/scala - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client diff --git a/examples/pom.xml b/examples/pom.xml index a051da8a77..6a9c19ed6f 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -32,6 +32,31 @@ http://spark-project.org/ + + org.spark-project + spark-core + ${project.version} + + + org.spark-project + spark-streaming + ${project.version} + + + org.spark-project + spark-mllib + ${project.version} + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hbase + hbase + 0.94.6 + org.scala-lang scala-library @@ -55,41 +80,41 @@ scalacheck_${scala.version} test - - org.apache.cassandra - cassandra-all - 1.2.5 - - - com.google.guava - guava - - - com.googlecode.concurrentlinkedhashmap - concurrentlinkedhashmap-lru - - - com.ning - compress-lzf - - - io.netty - netty - - - jline - jline - - - log4j - log4j - - - org.apache.cassandra.deps - avro - - - + + org.apache.cassandra + cassandra-all + 1.2.5 + + + com.google.guava + guava + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.ning + compress-lzf + + + io.netty + netty + + + jline + jline + + + log4j + log4j + + + org.apache.cassandra.deps + avro + + + target/scala-${scala.version}/classes @@ -103,50 +128,6 @@ - - hadoop1 - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.spark-project - spark-streaming - ${project.version} - hadoop1 - - - org.spark-project - spark-mllib - ${project.version} - hadoop1 - - - org.apache.hadoop - hadoop-core - provided - - - org.apache.hbase - hbase - 0.94.6 - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 @@ -168,11 +149,6 @@ ${project.version} hadoop2 - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client diff --git a/mllib/pom.xml b/mllib/pom.xml index a07480fbe2..36f410d3b0 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -32,6 +32,16 @@ http://spark-project.org/ + + org.spark-project + spark-core + ${project.version} + + + org.apache.hadoop + hadoop-client + provided + org.eclipse.jetty jetty-server @@ -41,7 +51,6 @@ jblas 1.2.3 - org.scalatest scalatest_${scala.version} @@ -70,33 +79,6 @@ - - hadoop1 - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.apache.hadoop - hadoop-core - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 @@ -106,11 +88,6 @@ ${project.version} hadoop2 - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client diff --git a/pom.xml b/pom.xml index 135b18bd26..8d34fff3ee 100644 --- a/pom.xml +++ b/pom.xml @@ -325,6 +325,17 @@ 0.8 test + + org.apache.hadoop + hadoop-client + 1.2.1 + + + asm + asm + + + @@ -530,22 +541,6 @@ - - hadoop1 - - 1 - - - - - org.apache.hadoop - hadoop-core - 1.2.1 - - - - - hadoop2 @@ -553,11 +548,6 @@ - - org.apache.hadoop - hadoop-core - 2.0.0-mr1-cdh${cdh.version} - org.apache.hadoop hadoop-client diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index 7c4e722cc1..81aba06e14 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -37,6 +37,37 @@ root + + + org.spark-project + spark-core + ${project.version} + + + org.spark-project + spark-bagel + ${project.version} + runtime + + + org.spark-project + spark-examples + ${project.version} + runtime + + + org.spark-project + spark-repl + ${project.version} + runtime + + + org.apache.hadoop + hadoop-client + runtime + + + @@ -85,46 +116,6 @@ - - hadoop1 - - hadoop1 - - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.spark-project - spark-bagel - ${project.version} - hadoop1 - runtime - - - org.spark-project - spark-examples - ${project.version} - hadoop1 - runtime - - - org.spark-project - spark-repl - ${project.version} - hadoop1 - runtime - - - org.apache.hadoop - hadoop-core - runtime - - - hadoop2 @@ -158,11 +149,6 @@ hadoop2 runtime - - org.apache.hadoop - hadoop-core - runtime - org.apache.hadoop hadoop-client diff --git a/repl/pom.xml b/repl/pom.xml index 862595b9f9..81cebe178a 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -37,6 +37,28 @@ + + org.spark-project + spark-core + ${project.version} + + + org.spark-project + spark-bagel + ${project.version} + runtime + + + org.spark-project + spark-examples + ${project.version} + runtime + + + org.apache.hadoop + hadoop-client + provided + org.eclipse.jetty jetty-server @@ -57,7 +79,6 @@ org.slf4j slf4j-log4j12 - org.scalatest scalatest_${scala.version} @@ -117,50 +138,6 @@ - - hadoop1 - - hadoop1 - - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.spark-project - spark-bagel - ${project.version} - hadoop1 - runtime - - - org.spark-project - spark-examples - ${project.version} - hadoop1 - runtime - - - org.apache.hadoop - hadoop-core - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 @@ -187,11 +164,6 @@ hadoop2 runtime - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client diff --git a/streaming/pom.xml b/streaming/pom.xml index 7e6b06d772..9b478f7a05 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -40,6 +40,16 @@ + + org.spark-project + spark-core + ${project.version} + + + org.apache.hadoop + hadoop-client + provided + org.eclipse.jetty jetty-server @@ -117,33 +127,6 @@ - - hadoop1 - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.apache.hadoop - hadoop-core - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 @@ -153,11 +136,6 @@ ${project.version} hadoop2 - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client diff --git a/tools/pom.xml b/tools/pom.xml index 878eb82f18..c123c2ab23 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -31,6 +31,21 @@ http://spark-project.org/ + + org.spark-project + spark-core + ${project.version} + + + org.spark-project + spark-streaming + ${project.version} + + + org.apache.hadoop + hadoop-client + provided + org.scalatest scalatest_${scala.version} @@ -58,39 +73,6 @@ - - hadoop1 - - - org.spark-project - spark-core - ${project.version} - hadoop1 - - - org.spark-project - spark-streaming - ${project.version} - hadoop1 - - - org.apache.hadoop - hadoop-core - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop1 - - - - - hadoop2 @@ -106,11 +88,6 @@ ${project.version} hadoop2 - - org.apache.hadoop - hadoop-core - provided - org.apache.hadoop hadoop-client From 11b42a84db255eb659412e9d0bf4622cb2e8b20a Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 15 Aug 2013 15:31:31 -0700 Subject: [PATCH 054/136] Maven build now works with CDH hadoop-2.0.0-mr1 --- assembly/pom.xml | 14 ----------- bagel/pom.xml | 27 -------------------- core/pom.xml | 52 -------------------------------------- examples/pom.xml | 44 -------------------------------- mllib/pom.xml | 27 -------------------- pom.xml | 55 +++++++++++++++------------------------- repl-bin/pom.xml | 40 ----------------------------- repl/pom.xml | 64 ++++++++--------------------------------------- streaming/pom.xml | 27 -------------------- tools/pom.xml | 33 ------------------------ 10 files changed, 30 insertions(+), 353 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 76ac9f5478..3d645e0379 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -37,20 +37,6 @@ - - - hadoop2 - - hadoop2 - - - - hadoop2-yarn - - hadoop2-yarn - - - org.spark-project diff --git a/bagel/pom.xml b/bagel/pom.xml index 3c82af3b33..1b555bead7 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -69,33 +69,6 @@ - - hadoop2 - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.apache.hadoop - hadoop-client - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn diff --git a/core/pom.xml b/core/pom.xml index 680ae94a11..9310d000fd 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -214,58 +214,6 @@ - - hadoop2 - - - org.apache.hadoop - hadoop-client - provided - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - src/main/scala - src/hadoop2/scala - - - - - add-scala-test-sources - generate-test-sources - - add-test-source - - - - src/test/scala - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn diff --git a/examples/pom.xml b/examples/pom.xml index 6a9c19ed6f..6e54e94cf5 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -128,50 +128,6 @@ - - hadoop2 - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.spark-project - spark-streaming - ${project.version} - hadoop2 - - - org.spark-project - spark-mllib - ${project.version} - hadoop2 - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hbase - hbase - 0.94.6 - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn diff --git a/mllib/pom.xml b/mllib/pom.xml index 36f410d3b0..863aef9392 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -79,33 +79,6 @@ - - hadoop2 - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.apache.hadoop - hadoop-client - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn diff --git a/pom.xml b/pom.xml index 8d34fff3ee..054f5d170c 100644 --- a/pom.xml +++ b/pom.xml @@ -73,8 +73,9 @@ 0.12.1 2.0.3 1.7.2 - 4.1.2 1.2.17 + 1.2.1 + 64m 512m @@ -328,7 +329,7 @@ org.apache.hadoop hadoop-client - 1.2.1 + ${hadoop.version} asm @@ -336,6 +337,23 @@ + + + org.apache.avro + avro + 1.7.4 + + + org.apache.avro + avro-ipc + 1.7.4 + + + org.jboss.netty + netty + + + @@ -541,39 +559,6 @@ - - hadoop2 - - 2 - - - - - org.apache.hadoop - hadoop-client - 2.0.0-mr1-cdh${cdh.version} - - - - org.apache.avro - avro - 1.7.4 - - - org.apache.avro - avro-ipc - 1.7.4 - - - org.jboss.netty - netty - - - - - - - hadoop2-yarn diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index 81aba06e14..eaee8ea016 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -116,46 +116,6 @@ - - hadoop2 - - hadoop2 - - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.spark-project - spark-bagel - ${project.version} - hadoop2 - runtime - - - org.spark-project - spark-examples - ${project.version} - hadoop2 - runtime - - - org.spark-project - spark-repl - ${project.version} - hadoop2 - runtime - - - org.apache.hadoop - hadoop-client - runtime - - - hadoop2-yarn diff --git a/repl/pom.xml b/repl/pom.xml index 81cebe178a..032c20e118 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -59,6 +59,16 @@ hadoop-client provided + + org.apache.avro + avro + provided + + + org.apache.avro + avro-ipc + provided + org.eclipse.jetty jetty-server @@ -138,60 +148,6 @@ - - hadoop2 - - hadoop2 - - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.spark-project - spark-bagel - ${project.version} - hadoop2 - runtime - - - org.spark-project - spark-examples - ${project.version} - hadoop2 - runtime - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.avro - avro - provided - - - org.apache.avro - avro-ipc - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn diff --git a/streaming/pom.xml b/streaming/pom.xml index 9b478f7a05..612ff0a024 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -127,33 +127,6 @@ - - hadoop2 - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.apache.hadoop - hadoop-client - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn diff --git a/tools/pom.xml b/tools/pom.xml index c123c2ab23..5864c9f217 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -73,39 +73,6 @@ - - hadoop2 - - - org.spark-project - spark-core - ${project.version} - hadoop2 - - - org.spark-project - spark-streaming - ${project.version} - hadoop2 - - - org.apache.hadoop - hadoop-client - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2 - - - - - hadoop2-yarn From 9dd15fe700ad8f52739cce58cbdf198fab8fd5d8 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 15 Aug 2013 15:40:37 -0700 Subject: [PATCH 055/136] Don't mark hadoop-client as 'provided' --- bagel/pom.xml | 1 - core/pom.xml | 1 - examples/pom.xml | 1 - mllib/pom.xml | 1 - 4 files changed, 4 deletions(-) diff --git a/bagel/pom.xml b/bagel/pom.xml index 1b555bead7..fc5dce7ffd 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -40,7 +40,6 @@ org.apache.hadoop hadoop-client - provided org.eclipse.jetty diff --git a/core/pom.xml b/core/pom.xml index 9310d000fd..c5baecfaad 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -35,7 +35,6 @@ org.apache.hadoop hadoop-client - provided org.eclipse.jetty diff --git a/examples/pom.xml b/examples/pom.xml index 6e54e94cf5..4ccc6aa198 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -50,7 +50,6 @@ org.apache.hadoop hadoop-client - provided org.apache.hbase diff --git a/mllib/pom.xml b/mllib/pom.xml index 863aef9392..801aa6e719 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -40,7 +40,6 @@ org.apache.hadoop hadoop-client - provided org.eclipse.jetty From 741ecd56fe714fe42c22518aefcfa48fa3a448c7 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 15 Aug 2013 16:32:15 -0700 Subject: [PATCH 056/136] Forgot to remove a few references to ${classifier} --- assembly/pom.xml | 5 ----- repl-bin/pom.xml | 8 ++++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 3d645e0379..ca20ccadba 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -41,31 +41,26 @@ org.spark-project spark-core - ${classifier.name} ${project.version} org.spark-project spark-bagel - ${classifier.name} ${project.version} org.spark-project spark-mllib - ${classifier.name} ${project.version} org.spark-project spark-repl - ${classifier.name} ${project.version} org.spark-project spark-streaming - ${classifier.name} ${project.version} diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index eaee8ea016..270a160120 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -32,8 +32,8 @@ http://spark-project.org/ - spark-${classifier} - /usr/share/spark-${classifier} + spark + /usr/share/spark root @@ -75,7 +75,7 @@ maven-shade-plugin false - ${project.build.directory}/${project.artifactId}-${project.version}-shaded-${classifier}.jar + ${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar *:* @@ -207,7 +207,7 @@ gzip - ${project.build.directory}/${project.artifactId}-${project.version}-shaded-${classifier}.jar + ${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar file perm From ad580b94d506b3dbb8b4206326e4df2e1104e3b3 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 15 Aug 2013 16:49:24 -0700 Subject: [PATCH 057/136] Maven build now also works with YARN --- bagel/pom.xml | 40 ------------ bin/compute-classpath.sh | 2 +- core/pom.xml | 70 --------------------- examples/pom.xml | 57 ----------------- mllib/pom.xml | 40 ------------ pom.xml | 128 +++++++++++++++++++++++++++++++++++++++ repl-bin/pom.xml | 47 +------------- repl/pom.xml | 64 +------------------- streaming/pom.xml | 40 ------------ tools/pom.xml | 46 -------------- yarn/pom.xml | 106 ++++++++++++++++++++++++++++++++ 11 files changed, 237 insertions(+), 403 deletions(-) create mode 100644 yarn/pom.xml diff --git a/bagel/pom.xml b/bagel/pom.xml index fc5dce7ffd..ae40f38e43 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -66,44 +66,4 @@ - - - - hadoop2-yarn - - - org.spark-project - spark-core - ${project.version} - hadoop2-yarn - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - - - diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index db6fc866ab..f975d3bfb9 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -63,7 +63,7 @@ function dev_classpath { CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*" # Add the shaded JAR for Maven builds if [ -e $REPL_BIN_DIR/target ]; then - for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do + for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded.jar'`; do CLASSPATH="$CLASSPATH:$jar" done # The shaded JAR doesn't contain examples, so include those separately diff --git a/core/pom.xml b/core/pom.xml index c5baecfaad..90d279c635 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -211,74 +211,4 @@ - - - - hadoop2-yarn - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - org.apache.hadoop - hadoop-yarn-client - provided - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - src/main/scala - src/hadoop2-yarn/scala - - - - - add-scala-test-sources - generate-test-sources - - add-test-source - - - - src/test/scala - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - - - diff --git a/examples/pom.xml b/examples/pom.xml index 4ccc6aa198..4eb32935f4 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -125,61 +125,4 @@ - - - - hadoop2-yarn - - - org.spark-project - spark-core - ${project.version} - hadoop2-yarn - - - org.spark-project - spark-streaming - ${project.version} - hadoop2-yarn - - - org.spark-project - spark-mllib - ${project.version} - hadoop2-yarn - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - org.apache.hbase - hbase - 0.94.6 - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - - - diff --git a/mllib/pom.xml b/mllib/pom.xml index 801aa6e719..3292f6dad0 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -76,44 +76,4 @@ - - - - hadoop2-yarn - - - org.spark-project - spark-core - ${project.version} - hadoop2-yarn - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - - - diff --git a/pom.xml b/pom.xml index 054f5d170c..4714576f3b 100644 --- a/pom.xml +++ b/pom.xml @@ -335,6 +335,26 @@ asm asm + + org.jboss.netty + netty + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-xc + @@ -568,6 +588,10 @@ 2.0.5-alpha + + yarn + + maven-root @@ -589,21 +613,125 @@ org.apache.hadoop hadoop-client ${yarn.version} + + + asm + asm + + + org.jboss.netty + netty + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-xc + + org.apache.hadoop hadoop-yarn-api ${yarn.version} + + + asm + asm + + + org.jboss.netty + netty + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-xc + + org.apache.hadoop hadoop-yarn-common ${yarn.version} + + + asm + asm + + + org.jboss.netty + netty + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-xc + + org.apache.hadoop hadoop-yarn-client ${yarn.version} + + + asm + asm + + + org.jboss.netty + netty + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-xc + + diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index 270a160120..f3bde60744 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -118,56 +118,11 @@ hadoop2-yarn - - hadoop2-yarn - org.spark-project - spark-core + spark-yarn ${project.version} - hadoop2-yarn - - - org.spark-project - spark-bagel - ${project.version} - hadoop2-yarn - runtime - - - org.spark-project - spark-examples - ${project.version} - hadoop2-yarn - runtime - - - org.spark-project - spark-repl - ${project.version} - hadoop2-yarn - runtime - - - org.apache.hadoop - hadoop-client - runtime - - - org.apache.hadoop - hadoop-yarn-api - runtime - - - org.apache.hadoop - hadoop-yarn-common - runtime - - - org.apache.hadoop - hadoop-yarn-client - runtime diff --git a/repl/pom.xml b/repl/pom.xml index 032c20e118..429de7861f 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -146,78 +146,16 @@ - hadoop2-yarn - - hadoop2-yarn - org.spark-project - spark-core + spark-yarn ${project.version} - hadoop2-yarn - - - org.spark-project - spark-bagel - ${project.version} - hadoop2-yarn - runtime - - - org.spark-project - spark-examples - ${project.version} - hadoop2-yarn - runtime - - - org.spark-project - spark-streaming - ${project.version} - hadoop2-yarn - runtime - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - org.apache.avro - avro - provided - - - org.apache.avro - avro-ipc - provided - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - diff --git a/streaming/pom.xml b/streaming/pom.xml index 612ff0a024..1860990122 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -125,44 +125,4 @@ - - - - hadoop2-yarn - - - org.spark-project - spark-core - ${project.version} - hadoop2-yarn - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - - - diff --git a/tools/pom.xml b/tools/pom.xml index 5864c9f217..9177d85b2f 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -71,50 +71,4 @@ - - - - hadoop2-yarn - - - org.spark-project - spark-core - ${project.version} - hadoop2-yarn - - - org.spark-project - spark-streaming - ${project.version} - hadoop2-yarn - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-yarn-api - provided - - - org.apache.hadoop - hadoop-yarn-common - provided - - - - - - org.apache.maven.plugins - maven-jar-plugin - - hadoop2-yarn - - - - - - diff --git a/yarn/pom.xml b/yarn/pom.xml new file mode 100644 index 0000000000..6acde8e98c --- /dev/null +++ b/yarn/pom.xml @@ -0,0 +1,106 @@ + + + + + 4.0.0 + + org.spark-project + spark-parent + 0.8.0-SNAPSHOT + ../pom.xml + + + org.spark-project + spark-yarn + jar + Spark Project YARN Support + http://spark-project.org/ + + + + org.spark-project + spark-core + ${project.version} + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.avro + avro + + + org.apache.avro + avro-ipc + + + + + target/scala-${scala.version}/classes + target/scala-${scala.version}/test-classes + + + org.apache.maven.plugins + maven-shade-plugin + + false + ${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + reference.conf + + + + + + + + + From c1e547bb7f21ebed198bea0aed0a122eb0d70835 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Fri, 16 Aug 2013 12:26:45 -0700 Subject: [PATCH 058/136] Updates to repl and example POMs to match SBT build --- examples/pom.xml | 10 ++++++++++ repl-bin/pom.xml | 6 ------ repl/pom.xml | 6 ------ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index 4eb32935f4..023ad8cb45 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -55,6 +55,16 @@ org.apache.hbase hbase 0.94.6 + + + asm + asm + + + org.jboss.netty + netty + + org.scala-lang diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index f3bde60744..f132c44fb9 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -49,12 +49,6 @@ ${project.version} runtime - - org.spark-project - spark-examples - ${project.version} - runtime - org.spark-project spark-repl diff --git a/repl/pom.xml b/repl/pom.xml index 429de7861f..82e26defbc 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -48,12 +48,6 @@ ${project.version} runtime - - org.spark-project - spark-examples - ${project.version} - runtime - org.apache.hadoop hadoop-client From b1d99744a813a72301260612943f46853794f00f Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Fri, 16 Aug 2013 13:49:26 -0700 Subject: [PATCH 059/136] Fix SBT build under Hadoop 0.23.x --- project/SparkBuild.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index fa9ec7deca..4023626c16 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -193,6 +193,17 @@ object SparkBuild extends Build { "com.codahale.metrics" % "metrics-json" % "3.0.0", "com.twitter" % "chill_2.9.3" % "0.3.1", "com.twitter" % "chill-java" % "0.3.1" + ) ++ ( + if (isYarnMode) { + // This kludge is needed for 0.23.x + Seq( + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm) + ) + } else { + Seq() + } ) ) ++ assemblySettings ++ extraAssemblySettings From 67b593607c7df934d5a73012fe9cce220b25f321 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Fri, 16 Aug 2013 13:53:16 -0700 Subject: [PATCH 060/136] Rename YARN build flag to SPARK_WITH_YARN --- make-distribution.sh | 8 ++++---- project/SparkBuild.scala | 12 +++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/make-distribution.sh b/make-distribution.sh index a101024de5..55dc22b992 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -47,7 +47,7 @@ VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z # Initialize defaults SPARK_HADOOP_VERSION=1.2.1 -SPARK_YARN_MODE=false +SPARK_WITH_YARN=false MAKE_TGZ=false # Parse arguments @@ -58,7 +58,7 @@ while (( "$#" )); do shift ;; --with-yarn) - SPARK_YARN_MODE=true + SPARK_WITH_YARN=true ;; --tgz) MAKE_TGZ=true @@ -74,7 +74,7 @@ else fi echo "Hadoop version set to $SPARK_HADOOP_VERSION" -if [ "$SPARK_YARN_MODE" == "true" ]; then +if [ "$SPARK_WITH_YARN" == "true" ]; then echo "YARN enabled" else echo "YARN disabled" @@ -82,7 +82,7 @@ fi # Build fat JAR export SPARK_HADOOP_VERSION -export SPARK_YARN_MODE +export SPARK_WITH_YARN "$FWDIR/sbt/sbt" "repl/assembly" # Make directories diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4023626c16..cea982b886 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -26,8 +26,10 @@ import AssemblyKeys._ object SparkBuild extends Build { // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. - val HADOOP_VERSION = "1.2.1" - val HADOOP_YARN = false + // Note that these variables can be set through the environment variables + // SPARK_HADOOP_VERSION and SPARK_WITH_YARN. + val DEFAULT_HADOOP_VERSION = "1.2.1" + val DEFAULT_WITH_YARN = false // HBase version; set as appropriate. val HBASE_VERSION = "0.94.6" @@ -55,9 +57,9 @@ object SparkBuild extends Build { lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") // Allows build configuration to be set through environment variables - lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", HADOOP_VERSION) - lazy val isYarnMode = scala.util.Properties.envOrNone("SPARK_YARN_MODE") match { - case None => HADOOP_YARN + lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) + lazy val isYarnMode = scala.util.Properties.envOrNone("SPARK_WITH_YARN") match { + case None => DEFAULT_WITH_YARN case Some(v) => v.toBoolean } From 44000b10ffed83ae605521701d104878f491f31c Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Sun, 18 Aug 2013 16:23:22 -0700 Subject: [PATCH 061/136] Make YARN POM file valid --- yarn/pom.xml | 61 ++++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/yarn/pom.xml b/yarn/pom.xml index 6acde8e98c..07dd170eae 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -30,34 +30,6 @@ Spark Project YARN Support http://spark-project.org/ - - - org.spark-project - spark-core - ${project.version} - - - org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-common - - - org.apache.hadoop - hadoop-yarn-client - - - org.apache.avro - avro - - - org.apache.avro - avro-ipc - - - target/scala-${scala.version}/classes target/scala-${scala.version}/test-classes @@ -103,4 +75,37 @@ + + + + hadoop2-yarn + + + org.spark-project + spark-core + ${project.version} + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.avro + avro + + + org.apache.avro + avro-ipc + + + + From 47a7c4338a7e912d8677704204e98df15679322b Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Sun, 18 Aug 2013 16:58:08 -0700 Subject: [PATCH 062/136] Don't assume spark-examples JAR always exists --- bin/compute-classpath.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index f975d3bfb9..7a21b3c4a1 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -67,8 +67,9 @@ function dev_classpath { CLASSPATH="$CLASSPATH:$jar" done # The shaded JAR doesn't contain examples, so include those separately - EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar` - CLASSPATH+=":$EXAMPLES_JAR" + for jar in `find "$EXAMPLES_DIR/target" -name 'spark-examples*[0-9T].jar'`; do + CLASSPATH="$CLASSPATH:$jar" + done fi CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes" From bdd861c6c34ca3fa158707b3a1bed91ef928c1e3 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Sun, 18 Aug 2013 18:28:10 -0700 Subject: [PATCH 063/136] Fix Maven build with Hadoop 0.23.9 --- core/pom.xml | 8 ++++++++ pom.xml | 11 ----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 90d279c635..2870906092 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -36,6 +36,14 @@ org.apache.hadoop hadoop-client + + org.apache.avro + avro + + + org.apache.avro + avro-ipc + org.eclipse.jetty jetty-server diff --git a/pom.xml b/pom.xml index 4714576f3b..e7445319dd 100644 --- a/pom.xml +++ b/pom.xml @@ -733,17 +733,6 @@ - - - org.apache.avro - avro - 1.7.4 - - - org.apache.avro - avro-ipc - 1.7.4 - From 23f4622affc25685bb74ce09ed56ef3515551d17 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Sun, 18 Aug 2013 18:53:57 -0700 Subject: [PATCH 064/136] Remove redundant dependencies from POMs --- bagel/pom.xml | 4 ---- examples/pom.xml | 4 ---- mllib/pom.xml | 4 ---- repl-bin/pom.xml | 5 ----- repl/pom.xml | 15 --------------- streaming/pom.xml | 5 ----- tools/pom.xml | 5 ----- 7 files changed, 42 deletions(-) diff --git a/bagel/pom.xml b/bagel/pom.xml index ae40f38e43..cbcf8d1239 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -37,10 +37,6 @@ spark-core ${project.version} - - org.apache.hadoop - hadoop-client - org.eclipse.jetty jetty-server diff --git a/examples/pom.xml b/examples/pom.xml index 023ad8cb45..0db52b8691 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -47,10 +47,6 @@ spark-mllib ${project.version} - - org.apache.hadoop - hadoop-client - org.apache.hbase hbase diff --git a/mllib/pom.xml b/mllib/pom.xml index 3292f6dad0..ab31d5734e 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -37,10 +37,6 @@ spark-core ${project.version} - - org.apache.hadoop - hadoop-client - org.eclipse.jetty jetty-server diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index f132c44fb9..919e35f240 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -55,11 +55,6 @@ ${project.version} runtime - - org.apache.hadoop - hadoop-client - runtime - diff --git a/repl/pom.xml b/repl/pom.xml index 82e26defbc..5bc9a99c5c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -48,21 +48,6 @@ ${project.version} runtime - - org.apache.hadoop - hadoop-client - provided - - - org.apache.avro - avro - provided - - - org.apache.avro - avro-ipc - provided - org.eclipse.jetty jetty-server diff --git a/streaming/pom.xml b/streaming/pom.xml index 1860990122..5c0582d6fb 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -45,11 +45,6 @@ spark-core ${project.version} - - org.apache.hadoop - hadoop-client - provided - org.eclipse.jetty jetty-server diff --git a/tools/pom.xml b/tools/pom.xml index 9177d85b2f..95b5e80e5b 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -41,11 +41,6 @@ spark-streaming ${project.version} - - org.apache.hadoop - hadoop-client - provided - org.scalatest scalatest_${scala.version} From 90a04dab8d9a2a9a372cea7cdf46cc0fd0f2f76c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sun, 28 Jul 2013 22:02:01 -0400 Subject: [PATCH 065/136] Initial work towards scheduler refactoring: - Replace use of hostPort vs host in Task.preferredLocations with a TaskLocation class that contains either an executorId and a host or just a host. This is part of a bigger effort to eliminate hostPort based data structures and just use executorID, since the hostPort vs host stuff is confusing (and not checkable with static typing, leading to ugly debug code), and hostPorts are not provided by Mesos. - Replaced most hostPort-based data structures and fields as above. - Simplified ClusterTaskSetManager to deal with preferred locations in a more concise way and generally be more concise. - Updated the way ClusterTaskSetManager handles racks: instead of enqueueing a task to a separate queue for all the hosts in the rack, which would create lots of large queues, have one queue per rack name. - Removed non-local fallback stuff in ClusterScheduler that tried to launch less-local tasks on a node once the local ones were all assigned. This change didn't work because many cluster schedulers send offers for just one node at a time (even the standalone and YARN ones do so as nodes join the cluster one by one). Thus, lots of non-local tasks would be assigned even though a node with locality for them would be able to receive tasks just a short time later. - Renamed MapOutputTracker "generations" to "epochs". --- .../cluster/YarnClusterScheduler.scala | 7 - .../main/scala/spark/MapOutputTracker.scala | 62 +- core/src/main/scala/spark/RDD.scala | 4 +- .../main/scala/spark/executor/Executor.scala | 12 +- core/src/main/scala/spark/rdd/BlockRDD.scala | 7 +- .../scala/spark/rdd/ZippedPartitionsRDD.scala | 2 + .../scala/spark/scheduler/DAGScheduler.scala | 79 +-- .../spark/scheduler/DAGSchedulerEvent.scala | 4 +- .../scala/spark/scheduler/ResultTask.scala | 15 +- .../spark/scheduler/ShuffleMapTask.scala | 15 +- .../src/main/scala/spark/scheduler/Task.scala | 4 +- .../scala/spark/scheduler/TaskLocation.scala | 32 ++ .../scheduler/TaskSchedulerListener.scala | 2 +- .../scheduler/cluster/ClusterScheduler.scala | 295 +++------- .../cluster/ClusterTaskSetManager.scala | 528 ++++++------------ .../cluster/StandaloneSchedulerBackend.scala | 15 +- .../spark/scheduler/cluster/TaskInfo.scala | 4 +- .../scheduler/cluster/TaskLocality.scala | 32 ++ .../scheduler/cluster/TaskSetManager.scala | 13 +- .../spark/scheduler/cluster/WorkerOffer.scala | 3 +- .../scheduler/local/LocalScheduler.scala | 3 +- .../scheduler/local/LocalTaskSetManager.scala | 21 +- .../scala/spark/storage/BlockManager.scala | 64 +-- .../main/scala/spark/ui/jobs/StagePage.scala | 2 +- .../scala/spark/MapOutputTrackerSuite.scala | 12 +- .../scheduler/ClusterSchedulerSuite.scala | 6 +- .../spark/scheduler/DAGSchedulerSuite.scala | 26 +- 27 files changed, 501 insertions(+), 768 deletions(-) create mode 100644 core/src/main/scala/spark/scheduler/TaskLocation.scala create mode 100644 core/src/main/scala/spark/scheduler/cluster/TaskLocality.scala diff --git a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala b/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala index 307d96111c..bb58353e0c 100644 --- a/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala +++ b/core/src/hadoop2-yarn/scala/spark/scheduler/cluster/YarnClusterScheduler.scala @@ -41,13 +41,6 @@ private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) if (retval != null) Some(retval) else None } - // By default, if rack is unknown, return nothing - override def getCachedHostsForRack(rack: String): Option[Set[String]] = { - if (rack == None || rack == null) return None - - YarnAllocationHandler.fetchCachedHostsForRack(rack) - } - override def postStartHook() { val sparkContextInitialized = ApplicationMaster.sparkContextInitialized(sc) if (sparkContextInitialized){ diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/spark/MapOutputTracker.scala index 2c417e31db..0cd0341a72 100644 --- a/core/src/main/scala/spark/MapOutputTracker.scala +++ b/core/src/main/scala/spark/MapOutputTracker.scala @@ -64,11 +64,11 @@ private[spark] class MapOutputTracker extends Logging { // Incremented every time a fetch fails so that client nodes know to clear // their cache of map output locations if this happens. - private var generation: Long = 0 - private val generationLock = new java.lang.Object + private var epoch: Long = 0 + private val epochLock = new java.lang.Object // Cache a serialized version of the output statuses for each shuffle to send them out faster - var cacheGeneration = generation + var cacheEpoch = epoch private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]] val metadataCleaner = new MetadataCleaner("MapOutputTracker", this.cleanup) @@ -108,10 +108,10 @@ private[spark] class MapOutputTracker extends Logging { def registerMapOutputs( shuffleId: Int, statuses: Array[MapStatus], - changeGeneration: Boolean = false) { + changeEpoch: Boolean = false) { mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses) - if (changeGeneration) { - incrementGeneration() + if (changeEpoch) { + incrementEpoch() } } @@ -124,7 +124,7 @@ private[spark] class MapOutputTracker extends Logging { array(mapId) = null } } - incrementGeneration() + incrementEpoch() } else { throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID") } @@ -206,58 +206,58 @@ private[spark] class MapOutputTracker extends Logging { trackerActor = null } - // Called on master to increment the generation number - def incrementGeneration() { - generationLock.synchronized { - generation += 1 - logDebug("Increasing generation to " + generation) + // Called on master to increment the epoch number + def incrementEpoch() { + epochLock.synchronized { + epoch += 1 + logDebug("Increasing epoch to " + epoch) } } - // Called on master or workers to get current generation number - def getGeneration: Long = { - generationLock.synchronized { - return generation + // Called on master or workers to get current epoch number + def getEpoch: Long = { + epochLock.synchronized { + return epoch } } - // Called on workers to update the generation number, potentially clearing old outputs - // because of a fetch failure. (Each Mesos task calls this with the latest generation + // Called on workers to update the epoch number, potentially clearing old outputs + // because of a fetch failure. (Each worker task calls this with the latest epoch // number on the master at the time it was created.) - def updateGeneration(newGen: Long) { - generationLock.synchronized { - if (newGen > generation) { - logInfo("Updating generation to " + newGen + " and clearing cache") + def updateEpoch(newEpoch: Long) { + epochLock.synchronized { + if (newEpoch > epoch) { + logInfo("Updating epoch to " + newEpoch + " and clearing cache") // mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]] mapStatuses.clear() - generation = newGen + epoch = newEpoch } } } def getSerializedLocations(shuffleId: Int): Array[Byte] = { var statuses: Array[MapStatus] = null - var generationGotten: Long = -1 - generationLock.synchronized { - if (generation > cacheGeneration) { + var epochGotten: Long = -1 + epochLock.synchronized { + if (epoch > cacheEpoch) { cachedSerializedStatuses.clear() - cacheGeneration = generation + cacheEpoch = epoch } cachedSerializedStatuses.get(shuffleId) match { case Some(bytes) => return bytes case None => statuses = mapStatuses(shuffleId) - generationGotten = generation + epochGotten = epoch } } // If we got here, we failed to find the serialized locations in the cache, so we pulled // out a snapshot of the locations as "locs"; let's serialize and return that val bytes = serializeStatuses(statuses) logInfo("Size of output statuses for shuffle %d is %d bytes".format(shuffleId, bytes.length)) - // Add them into the table only if the generation hasn't changed while we were working - generationLock.synchronized { - if (generation == generationGotten) { + // Add them into the table only if the epoch hasn't changed while we were working + epochLock.synchronized { + if (epoch == epochGotten) { cachedSerializedStatuses(shuffleId) = bytes } } diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index 503ea6ccbf..f5767a3858 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -221,8 +221,8 @@ abstract class RDD[T: ClassManifest]( } /** - * Get the preferred location of a split, taking into account whether the - * RDD is checkpointed or not. + * Get the preferred locations of a partition (as hostnames), taking into account whether the + * RDD is checkpointed. */ final def preferredLocations(split: Partition): Seq[String] = { checkpointRDD.map(_.getPreferredLocations(split)).getOrElse { diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/spark/executor/Executor.scala index 05a960d7c5..036c7191ad 100644 --- a/core/src/main/scala/spark/executor/Executor.scala +++ b/core/src/main/scala/spark/executor/Executor.scala @@ -32,8 +32,12 @@ import spark._ /** * The Mesos executor for Spark. */ -private[spark] class Executor(executorId: String, slaveHostname: String, properties: Seq[(String, String)]) extends Logging { - +private[spark] class Executor( + executorId: String, + slaveHostname: String, + properties: Seq[(String, String)]) + extends Logging +{ // Application dependencies (added through SparkContext) that we've fetched so far on this node. // Each map holds the master's timestamp for the version of that file or JAR we got. private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]() @@ -125,8 +129,8 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert updateDependencies(taskFiles, taskJars) val task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader) attemptedTask = Some(task) - logInfo("Its generation is " + task.generation) - env.mapOutputTracker.updateGeneration(task.generation) + logInfo("Its epoch is " + task.epoch) + env.mapOutputTracker.updateEpoch(task.epoch) taskStart = System.currentTimeMillis() val value = task.run(taskId.toInt) val taskFinish = System.currentTimeMillis() diff --git a/core/src/main/scala/spark/rdd/BlockRDD.scala b/core/src/main/scala/spark/rdd/BlockRDD.scala index 0ebb722d73..03800584ae 100644 --- a/core/src/main/scala/spark/rdd/BlockRDD.scala +++ b/core/src/main/scala/spark/rdd/BlockRDD.scala @@ -28,13 +28,12 @@ private[spark] class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[String]) extends RDD[T](sc, Nil) { - @transient lazy val locations_ = BlockManager.blockIdsToExecutorLocations(blockIds, SparkEnv.get) + @transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get) override def getPartitions: Array[Partition] = (0 until blockIds.size).map(i => { new BlockRDDPartition(blockIds(i), i).asInstanceOf[Partition] }).toArray - override def compute(split: Partition, context: TaskContext): Iterator[T] = { val blockManager = SparkEnv.get.blockManager val blockId = split.asInstanceOf[BlockRDDPartition].blockId @@ -45,8 +44,8 @@ class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[St } } - override def getPreferredLocations(split: Partition): Seq[String] = + override def getPreferredLocations(split: Partition): Seq[String] = { locations_(split.asInstanceOf[BlockRDDPartition].blockId) - + } } diff --git a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala index 6a4fa13ad6..51f5cc3251 100644 --- a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala @@ -55,6 +55,8 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest]( } override def getPreferredLocations(s: Partition): Seq[String] = { + // TODO(matei): Fix this for hostPort + // Note that as number of rdd's increase and/or number of slaves in cluster increase, the computed preferredLocations below // become diminishingly small : so we might need to look at alternate strategies to alleviate this. // If there are no (or very small number of preferred locations), we will end up transferred the blocks to 'any' node in the diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala index fbf3f4c807..2f7e6d98f8 100644 --- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala @@ -72,8 +72,8 @@ class DAGScheduler( } // Called by TaskScheduler when a host is added - override def executorGained(execId: String, hostPort: String) { - eventQueue.put(ExecutorGained(execId, hostPort)) + override def executorGained(execId: String, host: String) { + eventQueue.put(ExecutorGained(execId, host)) } // Called by TaskScheduler to cancel an entire TaskSet due to repeated failures. @@ -104,15 +104,16 @@ class DAGScheduler( private val listenerBus = new SparkListenerBus() - var cacheLocs = new HashMap[Int, Array[List[String]]] + // Contains the locations that each RDD's partitions are cached on + private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]] - // For tracking failed nodes, we use the MapOutputTracker's generation number, which is - // sent with every task. When we detect a node failing, we note the current generation number - // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask - // results. - // TODO: Garbage collect information about failure generations when we know there are no more + // For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with + // every task. When we detect a node failing, we note the current epoch number and failed + // executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask results. + // + // TODO: Garbage collect information about failure epochs when we know there are no more // stray messages to detect. - val failedGeneration = new HashMap[String, Long] + val failedEpoch = new HashMap[String, Long] val idToActiveJob = new HashMap[Int, ActiveJob] @@ -141,11 +142,13 @@ class DAGScheduler( listenerBus.addListener(listener) } - private def getCacheLocs(rdd: RDD[_]): Array[List[String]] = { + private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = { if (!cacheLocs.contains(rdd.id)) { val blockIds = rdd.partitions.indices.map(index=> "rdd_%d_%d".format(rdd.id, index)).toArray - val locs = BlockManager.blockIdsToExecutorLocations(blockIds, env, blockManagerMaster) - cacheLocs(rdd.id) = blockIds.map(locs.getOrElse(_, Nil)) + val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster) + cacheLocs(rdd.id) = blockIds.map { id => + locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId)) + } } cacheLocs(rdd.id) } @@ -345,8 +348,8 @@ class DAGScheduler( submitStage(finalStage) } - case ExecutorGained(execId, hostPort) => - handleExecutorGained(execId, hostPort) + case ExecutorGained(execId, host) => + handleExecutorGained(execId, host) case ExecutorLost(execId) => handleExecutorLost(execId) @@ -508,7 +511,7 @@ class DAGScheduler( } else { // This is a final stage; figure out its job's missing partitions val job = resultStageToJob(stage) - for (id <- 0 until job.numPartitions if (!job.finished(id))) { + for (id <- 0 until job.numPartitions if !job.finished(id)) { val partition = job.partitions(id) val locs = getPreferredLocs(stage.rdd, partition) tasks += new ResultTask(stage.id, stage.rdd, job.func, partition, locs, id) @@ -518,7 +521,7 @@ class DAGScheduler( // should be "StageSubmitted" first and then "JobEnded" val properties = idToActiveJob(stage.priority).properties listenerBus.post(SparkListenerStageSubmitted(stage, tasks.size, properties)) - + if (tasks.size > 0) { // Preemptively serialize a task to make sure it can be serialized. We are catching this // exception here because it would be fairly hard to catch the non-serializable exception @@ -599,7 +602,7 @@ class DAGScheduler( val status = event.result.asInstanceOf[MapStatus] val execId = status.location.executorId logDebug("ShuffleMapTask finished on " + execId) - if (failedGeneration.contains(execId) && smt.generation <= failedGeneration(execId)) { + if (failedEpoch.contains(execId) && smt.epoch <= failedEpoch(execId)) { logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + execId) } else { stage.addOutputLoc(smt.partition, status) @@ -611,11 +614,11 @@ class DAGScheduler( logInfo("waiting: " + waiting) logInfo("failed: " + failed) if (stage.shuffleDep != None) { - // We supply true to increment the generation number here in case this is a + // We supply true to increment the epoch number here in case this is a // recomputation of the map outputs. In that case, some nodes may have cached // locations with holes (from when we detected the error) and will need the - // generation incremented to refetch them. - // TODO: Only increment the generation number if this is not the first time + // epoch incremented to refetch them. + // TODO: Only increment the epoch number if this is not the first time // we registered these map outputs. mapOutputTracker.registerMapOutputs( stage.shuffleDep.get.shuffleId, @@ -674,7 +677,7 @@ class DAGScheduler( lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock // TODO: mark the executor as failed only if there were lots of fetch failures on it if (bmAddress != null) { - handleExecutorLost(bmAddress.executorId, Some(task.generation)) + handleExecutorLost(bmAddress.executorId, Some(task.epoch)) } case ExceptionFailure(className, description, stackTrace, metrics) => @@ -690,14 +693,14 @@ class DAGScheduler( * Responds to an executor being lost. This is called inside the event loop, so it assumes it can * modify the scheduler's internal state. Use executorLost() to post a loss event from outside. * - * Optionally the generation during which the failure was caught can be passed to avoid allowing + * Optionally the epoch during which the failure was caught can be passed to avoid allowing * stray fetch failures from possibly retriggering the detection of a node as lost. */ - private def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) { - val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration) - if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) { - failedGeneration(execId) = currentGeneration - logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration)) + private def handleExecutorLost(execId: String, maybeEpoch: Option[Long] = None) { + val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch) + if (!failedEpoch.contains(execId) || failedEpoch(execId) < currentEpoch) { + failedEpoch(execId) = currentEpoch + logInfo("Executor lost: %s (epoch %d)".format(execId, currentEpoch)) blockManagerMaster.removeExecutor(execId) // TODO: This will be really slow if we keep accumulating shuffle map stages for ((shuffleId, stage) <- shuffleToMapStage) { @@ -706,20 +709,20 @@ class DAGScheduler( mapOutputTracker.registerMapOutputs(shuffleId, locs, true) } if (shuffleToMapStage.isEmpty) { - mapOutputTracker.incrementGeneration() + mapOutputTracker.incrementEpoch() } clearCacheLocs() } else { logDebug("Additional executor lost message for " + execId + - "(generation " + currentGeneration + ")") + "(epoch " + currentEpoch + ")") } } - private def handleExecutorGained(execId: String, hostPort: String) { - // remove from failedGeneration(execId) ? - if (failedGeneration.contains(execId)) { - logInfo("Host gained which was in lost list earlier: " + hostPort) - failedGeneration -= execId + private def handleExecutorGained(execId: String, host: String) { + // remove from failedEpoch(execId) ? + if (failedEpoch.contains(execId)) { + logInfo("Host gained which was in lost list earlier: " + host) + failedEpoch -= execId } } @@ -774,16 +777,16 @@ class DAGScheduler( visitedRdds.contains(target.rdd) } - private def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = { + private def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = { // If the partition is cached, return the cache locations val cached = getCacheLocs(rdd)(partition) - if (cached != Nil) { + if (!cached.isEmpty) { return cached } // If the RDD has some placement preferences (as is the case for input RDDs), get those val rddPrefs = rdd.preferredLocations(rdd.partitions(partition)).toList - if (rddPrefs != Nil) { - return rddPrefs + if (!rddPrefs.isEmpty) { + return rddPrefs.map(host => TaskLocation(host)) } // If the RDD has narrow dependencies, pick the first partition of the first narrow dep // that has any placement preferences. Ideally we would choose based on transfer sizes, diff --git a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala index 3b4ee6287a..b8ba0e9239 100644 --- a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala +++ b/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala @@ -54,9 +54,7 @@ private[spark] case class CompletionEvent( taskMetrics: TaskMetrics) extends DAGSchedulerEvent -private[spark] case class ExecutorGained(execId: String, hostPort: String) extends DAGSchedulerEvent { - Utils.checkHostPort(hostPort, "Required hostport") -} +private[spark] case class ExecutorGained(execId: String, host: String) extends DAGSchedulerEvent private[spark] case class ExecutorLost(execId: String) extends DAGSchedulerEvent diff --git a/core/src/main/scala/spark/scheduler/ResultTask.scala b/core/src/main/scala/spark/scheduler/ResultTask.scala index 832ca18b8c..d066df5dc1 100644 --- a/core/src/main/scala/spark/scheduler/ResultTask.scala +++ b/core/src/main/scala/spark/scheduler/ResultTask.scala @@ -73,7 +73,7 @@ private[spark] class ResultTask[T, U]( var rdd: RDD[T], var func: (TaskContext, Iterator[T]) => U, var partition: Int, - @transient locs: Seq[String], + @transient locs: Seq[TaskLocation], val outputId: Int) extends Task[U](stageId) with Externalizable { @@ -85,11 +85,8 @@ private[spark] class ResultTask[T, U]( rdd.partitions(partition) } - private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq - - { - // DEBUG code - preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs)) + @transient private val preferredLocs: Seq[TaskLocation] = { + if (locs == null) Nil else locs.toSet.toSeq } override def run(attemptId: Long): U = { @@ -102,7 +99,7 @@ private[spark] class ResultTask[T, U]( } } - override def preferredLocations: Seq[String] = preferredLocs + override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString = "ResultTask(" + stageId + ", " + partition + ")" @@ -116,7 +113,7 @@ private[spark] class ResultTask[T, U]( out.write(bytes) out.writeInt(partition) out.writeInt(outputId) - out.writeLong(generation) + out.writeLong(epoch) out.writeObject(split) } } @@ -131,7 +128,7 @@ private[spark] class ResultTask[T, U]( func = func_.asInstanceOf[(TaskContext, Iterator[T]) => U] partition = in.readInt() val outputId = in.readInt() - generation = in.readLong() + epoch = in.readLong() split = in.readObject().asInstanceOf[Partition] } } diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala index e3bb6d1e60..2dbaef24ac 100644 --- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala @@ -88,18 +88,15 @@ private[spark] class ShuffleMapTask( var rdd: RDD[_], var dep: ShuffleDependency[_,_], var partition: Int, - @transient private var locs: Seq[String]) + @transient private var locs: Seq[TaskLocation]) extends Task[MapStatus](stageId) with Externalizable with Logging { protected def this() = this(0, null, null, 0, null) - @transient private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq - - { - // DEBUG code - preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs)) + @transient private val preferredLocs: Seq[TaskLocation] = { + if (locs == null) Nil else locs.toSet.toSeq } var split = if (rdd == null) null else rdd.partitions(partition) @@ -112,7 +109,7 @@ private[spark] class ShuffleMapTask( out.writeInt(bytes.length) out.write(bytes) out.writeInt(partition) - out.writeLong(generation) + out.writeLong(epoch) out.writeObject(split) } } @@ -126,7 +123,7 @@ private[spark] class ShuffleMapTask( rdd = rdd_ dep = dep_ partition = in.readInt() - generation = in.readLong() + epoch = in.readLong() split = in.readObject().asInstanceOf[Partition] } @@ -186,7 +183,7 @@ private[spark] class ShuffleMapTask( } } - override def preferredLocations: Seq[String] = preferredLocs + override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString = "ShuffleMapTask(%d, %d)".format(stageId, partition) } diff --git a/core/src/main/scala/spark/scheduler/Task.scala b/core/src/main/scala/spark/scheduler/Task.scala index 50768d43e0..0ab2ae6cfe 100644 --- a/core/src/main/scala/spark/scheduler/Task.scala +++ b/core/src/main/scala/spark/scheduler/Task.scala @@ -30,9 +30,9 @@ import spark.executor.TaskMetrics */ private[spark] abstract class Task[T](val stageId: Int) extends Serializable { def run(attemptId: Long): T - def preferredLocations: Seq[String] = Nil + def preferredLocations: Seq[TaskLocation] = Nil - var generation: Long = -1 // Map output tracker generation. Will be set by TaskScheduler. + var epoch: Long = -1 // Map output tracker epoch. Will be set by TaskScheduler. var metrics: Option[TaskMetrics] = None diff --git a/core/src/main/scala/spark/scheduler/TaskLocation.scala b/core/src/main/scala/spark/scheduler/TaskLocation.scala new file mode 100644 index 0000000000..0e97c61188 --- /dev/null +++ b/core/src/main/scala/spark/scheduler/TaskLocation.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.scheduler + +/** + * A location where a task should run. This can either be a host or a (host, executorID) pair. + * In the latter case, we will prefer to launch the task on that executorID, but our next level + * of preference will be executors on the same host if this is not possible. + */ +private[spark] +class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable + +private[spark] object TaskLocation { + def apply(host: String, executorId: String) = new TaskLocation(host, Some(executorId)) + + def apply(host: String) = new TaskLocation(host, None) +} diff --git a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala b/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala index 2cdeb1c8c0..64be50b2d0 100644 --- a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala +++ b/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala @@ -35,7 +35,7 @@ private[spark] trait TaskSchedulerListener { taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit // A node was added to the cluster. - def executorGained(execId: String, hostPort: String): Unit + def executorGained(execId: String, host: String): Unit // A node was lost from the cluster. def executorLost(execId: String): Unit diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala index 96568e0d27..036e36bca0 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala @@ -37,18 +37,22 @@ import java.util.{TimerTask, Timer} */ private[spark] class ClusterScheduler(val sc: SparkContext) extends TaskScheduler - with Logging { - + with Logging +{ // How often to check for speculative tasks val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong + // Threshold above which we warn user initial TaskSet may be starved val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong + // How often to revive offers in case there are pending tasks - that is how often to try to get // tasks scheduled in case there are nodes available : default 0 is to disable it - to preserve existing behavior - // Note that this is required due to delayed scheduling due to data locality waits, etc. - // TODO: rename property ? - val TASK_REVIVAL_INTERVAL = System.getProperty("spark.tasks.revive.interval", "0").toLong + // Note that this is required due to delay scheduling due to data locality waits, etc. + // TODO(matei): move to StandaloneSchedulerBackend? + val TASK_REVIVAL_INTERVAL = System.getProperty("spark.scheduler.revival.interval", "1000").toLong + // TODO(matei): replace this with something that only affects levels past PROCESS_LOCAL; + // basically it can be a "cliff" for locality /* This property controls how aggressive we should be to modulate waiting for node local task scheduling. To elaborate, currently there is a time limit (3 sec def) to ensure that spark attempts to wait for node locality of tasks before @@ -71,7 +75,8 @@ private[spark] class ClusterScheduler(val sc: SparkContext) If cluster is rack aware, then setting it to RACK_LOCAL gives best tradeoff and a 3x - 4x performance improvement while minimizing IO impact. Also, it brings down the variance in running time drastically. */ - val TASK_SCHEDULING_AGGRESSION = TaskLocality.parse(System.getProperty("spark.tasks.schedule.aggression", "NODE_LOCAL")) + val TASK_SCHEDULING_AGGRESSION = TaskLocality.withName( + System.getProperty("spark.tasks.schedule.aggression", "NODE_LOCAL")) val activeTaskSets = new HashMap[String, TaskSetManager] @@ -89,16 +94,11 @@ private[spark] class ClusterScheduler(val sc: SparkContext) // Which executor IDs we have executors on val activeExecutorIds = new HashSet[String] - // TODO: We might want to remove this and merge it with execId datastructures - but later. - // Which hosts in the cluster are alive (contains hostPort's) - used for process local and node local task locality. - private val hostPortsAlive = new HashSet[String] - private val hostToAliveHostPorts = new HashMap[String, HashSet[String]] - // The set of executors we have on each host; this is used to compute hostsAlive, which // in turn is used to decide when we can attain data locality on a given host - private val executorsByHostPort = new HashMap[String, HashSet[String]] + private val executorsByHost = new HashMap[String, HashSet[String]] - private val executorIdToHostPort = new HashMap[String, String] + private val executorIdToHost = new HashMap[String, String] // JAR server, if any JARs were added by the user to the SparkContext var jarServer: HttpServer = null @@ -138,7 +138,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext) schedulableBuilder.buildPools() // resolve executorId to hostPort mapping. def executorToHostPort(executorId: String, defaultHostPort: String): String = { - executorIdToHostPort.getOrElse(executorId, defaultHostPort) + executorIdToHost.getOrElse(executorId, defaultHostPort) } // Unfortunately, this means that SparkEnv is indirectly referencing ClusterScheduler @@ -146,13 +146,12 @@ private[spark] class ClusterScheduler(val sc: SparkContext) SparkEnv.get.executorIdToHostPort = Some(executorToHostPort) } - def newTaskId(): Long = nextTaskId.getAndIncrement() override def start() { backend.start() - if (JBoolean.getBoolean("spark.speculation")) { + if (System.getProperty("spark.speculation", "false").toBoolean) { new Thread("ClusterScheduler speculation check") { setDaemon(true) @@ -172,6 +171,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext) // Change to always run with some default if TASK_REVIVAL_INTERVAL <= 0 ? + // TODO(matei): remove this thread if (TASK_REVIVAL_INTERVAL > 0) { new Thread("ClusterScheduler task offer revival check") { setDaemon(true) @@ -201,7 +201,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext) schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties) taskSetTaskIds(taskSet.id) = new HashSet[Long]() - if (hasReceivedTask == false) { + if (!hasReceivedTask) { starvationTimer.scheduleAtFixedRate(new TimerTask() { override def run() { if (!hasLaunchedTask) { @@ -214,7 +214,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext) } }, STARVATION_TIMEOUT, STARVATION_TIMEOUT) } - hasReceivedTask = true; + hasReceivedTask = true } backend.reviveOffers() } @@ -235,172 +235,53 @@ private[spark] class ClusterScheduler(val sc: SparkContext) * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so * that tasks are balanced across the cluster. */ - def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = { - synchronized { - SparkEnv.set(sc.env) - // Mark each slave as alive and remember its hostname - for (o <- offers) { - // DEBUG Code - Utils.checkHostPort(o.hostPort) + def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized { + SparkEnv.set(sc.env) - executorIdToHostPort(o.executorId) = o.hostPort - if (! executorsByHostPort.contains(o.hostPort)) { - executorsByHostPort(o.hostPort) = new HashSet[String]() - } - - hostPortsAlive += o.hostPort - hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(o.hostPort)._1, new HashSet[String]).add(o.hostPort) - executorGained(o.executorId, o.hostPort) + // Mark each slave as alive and remember its hostname + for (o <- offers) { + executorIdToHost(o.executorId) = o.host + if (!executorsByHost.contains(o.host)) { + executorsByHost(o.host) = new HashSet[String]() + executorGained(o.executorId, o.host) } - // Build a list of tasks to assign to each slave - val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores)) - // merge availableCpus into nodeToAvailableCpus block ? - val availableCpus = offers.map(o => o.cores).toArray - val nodeToAvailableCpus = { - val map = new HashMap[String, Int]() - for (offer <- offers) { - val hostPort = offer.hostPort - val cores = offer.cores - // DEBUG code - Utils.checkHostPort(hostPort) - - val host = Utils.parseHostPort(hostPort)._1 - - map.put(host, map.getOrElse(host, 0) + cores) - } - - map - } - var launchedTask = false - val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue() - - for (manager <- sortedTaskSetQueue) { - logDebug("parentName:%s, name:%s, runningTasks:%s".format( - manager.parent.name, manager.name, manager.runningTasks)) - } - - for (manager <- sortedTaskSetQueue) { - - // Split offers based on node local, rack local and off-rack tasks. - val processLocalOffers = new HashMap[String, ArrayBuffer[Int]]() - val nodeLocalOffers = new HashMap[String, ArrayBuffer[Int]]() - val rackLocalOffers = new HashMap[String, ArrayBuffer[Int]]() - val otherOffers = new HashMap[String, ArrayBuffer[Int]]() - - for (i <- 0 until offers.size) { - val hostPort = offers(i).hostPort - // DEBUG code - Utils.checkHostPort(hostPort) - - val numProcessLocalTasks = math.max(0, math.min(manager.numPendingTasksForHostPort(hostPort), availableCpus(i))) - if (numProcessLocalTasks > 0){ - val list = processLocalOffers.getOrElseUpdate(hostPort, new ArrayBuffer[Int]) - for (j <- 0 until numProcessLocalTasks) list += i - } - - val host = Utils.parseHostPort(hostPort)._1 - val numNodeLocalTasks = math.max(0, - // Remove process local tasks (which are also host local btw !) from this - math.min(manager.numPendingTasksForHost(hostPort) - numProcessLocalTasks, nodeToAvailableCpus(host))) - if (numNodeLocalTasks > 0){ - val list = nodeLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int]) - for (j <- 0 until numNodeLocalTasks) list += i - } - - val numRackLocalTasks = math.max(0, - // Remove node local tasks (which are also rack local btw !) from this - math.min(manager.numRackLocalPendingTasksForHost(hostPort) - numProcessLocalTasks - numNodeLocalTasks, nodeToAvailableCpus(host))) - if (numRackLocalTasks > 0){ - val list = rackLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int]) - for (j <- 0 until numRackLocalTasks) list += i - } - if (numNodeLocalTasks <= 0 && numRackLocalTasks <= 0){ - // add to others list - spread even this across cluster. - val list = otherOffers.getOrElseUpdate(host, new ArrayBuffer[Int]) - list += i - } - } - - val offersPriorityList = new ArrayBuffer[Int]( - processLocalOffers.size + nodeLocalOffers.size + rackLocalOffers.size + otherOffers.size) - - // First process local, then host local, then rack, then others - - // numNodeLocalOffers contains count of both process local and host offers. - val numNodeLocalOffers = { - val processLocalPriorityList = ClusterScheduler.prioritizeContainers(processLocalOffers) - offersPriorityList ++= processLocalPriorityList - - val nodeLocalPriorityList = ClusterScheduler.prioritizeContainers(nodeLocalOffers) - offersPriorityList ++= nodeLocalPriorityList - - processLocalPriorityList.size + nodeLocalPriorityList.size - } - val numRackLocalOffers = { - val rackLocalPriorityList = ClusterScheduler.prioritizeContainers(rackLocalOffers) - offersPriorityList ++= rackLocalPriorityList - rackLocalPriorityList.size - } - offersPriorityList ++= ClusterScheduler.prioritizeContainers(otherOffers) - - var lastLoop = false - val lastLoopIndex = TASK_SCHEDULING_AGGRESSION match { - case TaskLocality.NODE_LOCAL => numNodeLocalOffers - case TaskLocality.RACK_LOCAL => numRackLocalOffers + numNodeLocalOffers - case TaskLocality.ANY => offersPriorityList.size - } - - do { - launchedTask = false - var loopCount = 0 - for (i <- offersPriorityList) { - val execId = offers(i).executorId - val hostPort = offers(i).hostPort - - // If last loop and within the lastLoopIndex, expand scope - else use null (which will use default/existing) - val overrideLocality = if (lastLoop && loopCount < lastLoopIndex) TASK_SCHEDULING_AGGRESSION else null - - // If last loop, override waiting for host locality - we scheduled all local tasks already and there might be more available ... - loopCount += 1 - - manager.slaveOffer(execId, hostPort, availableCpus(i), overrideLocality) match { - case Some(task) => - tasks(i) += task - val tid = task.taskId - taskIdToTaskSetId(tid) = manager.taskSet.id - taskSetTaskIds(manager.taskSet.id) += tid - taskIdToExecutorId(tid) = execId - activeExecutorIds += execId - executorsByHostPort(hostPort) += execId - availableCpus(i) -= 1 - launchedTask = true - - case None => {} - } - } - // Loop once more - when lastLoop = true, then we try to schedule task on all nodes irrespective of - // data locality (we still go in order of priority : but that would not change anything since - // if data local tasks had been available, we would have scheduled them already) - if (lastLoop) { - // prevent more looping - launchedTask = false - } else if (!lastLoop && !launchedTask) { - // Do this only if TASK_SCHEDULING_AGGRESSION != NODE_LOCAL - if (TASK_SCHEDULING_AGGRESSION != TaskLocality.NODE_LOCAL) { - // fudge launchedTask to ensure we loop once more - launchedTask = true - // dont loop anymore - lastLoop = true - } - } - } while (launchedTask) - } - - if (tasks.size > 0) { - hasLaunchedTask = true - } - return tasks } + + // Build a list of tasks to assign to each slave + val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores)) + val availableCpus = offers.map(o => o.cores).toArray + val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue() + for (manager <- sortedTaskSetQueue) { + logDebug("parentName: %s, name: %s, runningTasks: %s".format( + manager.parent.name, manager.name, manager.runningTasks)) + } + + var launchedTask = false + for (manager <- sortedTaskSetQueue; offer <- offers) { + do { + launchedTask = false + for (i <- 0 until offers.size) { + val execId = offers(i).executorId + val host = offers(i).host + for (task <- manager.resourceOffer(execId, host, availableCpus(i))) { + tasks(i) += task + val tid = task.taskId + taskIdToTaskSetId(tid) = manager.taskSet.id + taskSetTaskIds(manager.taskSet.id) += tid + taskIdToExecutorId(tid) = execId + activeExecutorIds += execId + executorsByHost(host) += execId + availableCpus(i) -= 1 + launchedTask = true + } + } + } while (launchedTask) + } + + if (tasks.size > 0) { + hasLaunchedTask = true + } + return tasks } def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { @@ -514,7 +395,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext) synchronized { if (activeExecutorIds.contains(executorId)) { - val hostPort = executorIdToHostPort(executorId) + val hostPort = executorIdToHost(executorId) logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason)) removeExecutor(executorId) failedExecutor = Some(executorId) @@ -535,52 +416,37 @@ private[spark] class ClusterScheduler(val sc: SparkContext) /** Remove an executor from all our data structures and mark it as lost */ private def removeExecutor(executorId: String) { + // TODO(matei): remove HostPort activeExecutorIds -= executorId - val hostPort = executorIdToHostPort(executorId) - if (hostPortsAlive.contains(hostPort)) { - // DEBUG Code - Utils.checkHostPort(hostPort) + val host = executorIdToHost(executorId) - hostPortsAlive -= hostPort - hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(hostPort)._1, new HashSet[String]).remove(hostPort) - } - - val execs = executorsByHostPort.getOrElse(hostPort, new HashSet) + val execs = executorsByHost.getOrElse(host, new HashSet) execs -= executorId if (execs.isEmpty) { - executorsByHostPort -= hostPort + executorsByHost -= host } - executorIdToHostPort -= executorId - rootPool.executorLost(executorId, hostPort) + executorIdToHost -= executorId + rootPool.executorLost(executorId, host) } - def executorGained(execId: String, hostPort: String) { - listener.executorGained(execId, hostPort) + def executorGained(execId: String, host: String) { + listener.executorGained(execId, host) } - def getExecutorsAliveOnHost(host: String): Option[Set[String]] = { - Utils.checkHost(host) - - val retval = hostToAliveHostPorts.get(host) - if (retval.isDefined) { - return Some(retval.get.toSet) - } - - None + def getExecutorsAliveOnHost(host: String): Option[Set[String]] = synchronized { + executorsByHost.get(host).map(_.toSet) } - def isExecutorAliveOnHostPort(hostPort: String): Boolean = { - // Even if hostPort is a host, it does not matter - it is just a specific check. - // But we do have to ensure that only hostPort get into hostPortsAlive ! - // So no check against Utils.checkHostPort - hostPortsAlive.contains(hostPort) + def hasExecutorsAliveOnHost(host: String): Boolean = synchronized { + executorsByHost.contains(host) + } + + def isExecutorAlive(execId: String): Boolean = synchronized { + activeExecutorIds.contains(execId) } // By default, rack is unknown def getRackForHost(value: String): Option[String] = None - - // By default, (cached) hosts for rack is unknown - def getCachedHostsForRack(rack: String): Option[Set[String]] = None } @@ -610,6 +476,7 @@ object ClusterScheduler { // order keyList based on population of value in map val keyList = _keyList.sortWith( + // TODO(matei): not sure why we're using getOrElse if keyList = map.keys... see if it matters (left, right) => map.get(left).getOrElse(Set()).size > map.get(right).getOrElse(Set()).size ) @@ -617,7 +484,7 @@ object ClusterScheduler { var index = 0 var found = true - while (found){ + while (found) { found = false for (key <- keyList) { val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null) diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala index 7f855cd345..1947c516db 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -29,49 +29,13 @@ import scala.math.min import spark.{FetchFailed, Logging, Resubmitted, SparkEnv, Success, TaskEndReason, TaskState, Utils} import spark.{ExceptionFailure, SparkException, TaskResultTooBigFailure} import spark.TaskState.TaskState -import spark.scheduler.{ShuffleMapTask, Task, TaskResult, TaskSet} +import spark.scheduler._ +import scala.Some +import spark.FetchFailed +import spark.ExceptionFailure +import spark.TaskResultTooBigFailure -private[spark] object TaskLocality - extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY") with Logging { - - // process local is expected to be used ONLY within tasksetmanager for now. - val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value - - type TaskLocality = Value - - def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = { - - // Must not be the constraint. - assert (constraint != TaskLocality.PROCESS_LOCAL) - - constraint match { - case TaskLocality.NODE_LOCAL => - condition == TaskLocality.NODE_LOCAL - case TaskLocality.RACK_LOCAL => - condition == TaskLocality.NODE_LOCAL || condition == TaskLocality.RACK_LOCAL - // For anything else, allow - case _ => true - } - } - - def parse(str: String): TaskLocality = { - // better way to do this ? - try { - val retval = TaskLocality.withName(str) - // Must not specify PROCESS_LOCAL ! - assert (retval != TaskLocality.PROCESS_LOCAL) - retval - } catch { - case nEx: NoSuchElementException => { - logWarning("Invalid task locality specified '" + str + "', defaulting to NODE_LOCAL") - // default to preserve earlier behavior - NODE_LOCAL - } - } - } -} - /** * Schedules the tasks within a single TaskSet in the ClusterScheduler. */ @@ -113,28 +77,26 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: // Last time when we launched a preferred task (for delay scheduling) var lastPreferredLaunchTime = System.currentTimeMillis - // List of pending tasks for each node (process local to container). - // These collections are actually + // Set of pending tasks for each executor. These collections are actually // treated as stacks, in which new tasks are added to the end of the // ArrayBuffer and removed from the end. This makes it faster to detect // tasks that repeatedly fail because whenever a task failed, it is put // back at the head of the stack. They are also only cleaned up lazily; // when a task is launched, it remains in all the pending lists except // the one that it was launched from, but gets removed from them later. - private val pendingTasksForHostPort = new HashMap[String, ArrayBuffer[Int]] + private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]] - // List of pending tasks for each node. - // Essentially, similar to pendingTasksForHostPort, except at host level + // Set of pending tasks for each host. Similar to pendingTasksForExecutor, + // but at host level. private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]] - // List of pending tasks for each node based on rack locality. - // Essentially, similar to pendingTasksForHost, except at rack level - private val pendingRackLocalTasksForHost = new HashMap[String, ArrayBuffer[Int]] + // Set of pending tasks for each rack -- similar to the above. + private val pendingTasksForRack = new HashMap[String, ArrayBuffer[Int]] - // List containing pending tasks with no locality preferences + // Set containing pending tasks with no locality preferences. val pendingTasksWithNoPrefs = new ArrayBuffer[Int] - // List containing all pending tasks (also used as a stack, as above) + // Set containing all pending tasks (also used as a stack, as above). val allPendingTasks = new ArrayBuffer[Int] // Tasks that can be speculated. Since these will be a small fraction of total @@ -144,13 +106,14 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: // Task index, start and finish time for each task attempt (indexed by task ID) val taskInfos = new HashMap[Long, TaskInfo] - // Did the job fail? + // Did the TaskSet fail? var failed = false var causeOfFailure = "" // How frequently to reprint duplicate exceptions in full, in milliseconds val EXCEPTION_PRINT_INTERVAL = System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong + // Map of recent exceptions (identified by string representation and // top stack frame) to duplicate count (how many times the same // exception has appeared) and time the full exception was @@ -158,11 +121,11 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: // exceptions automatically. val recentExceptions = HashMap[String, (Int, Long)]() - // Figure out the current map output tracker generation and set it on all tasks - val generation = sched.mapOutputTracker.getGeneration - logDebug("Generation for " + taskSet.id + ": " + generation) + // Figure out the current map output tracker epoch and set it on all tasks + val epoch = sched.mapOutputTracker.getEpoch + logDebug("Epoch for " + taskSet.id + ": " + epoch) for (t <- tasks) { - t.generation = generation + t.epoch = epoch } // Add all our tasks to the pending lists. We do this in reverse order @@ -171,166 +134,74 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: addPendingTask(i) } - // Note that it follows the hierarchy. - // if we search for NODE_LOCAL, the output will include PROCESS_LOCAL and - // if we search for RACK_LOCAL, it will include PROCESS_LOCAL & NODE_LOCAL - private def findPreferredLocations( - _taskPreferredLocations: Seq[String], - scheduler: ClusterScheduler, - taskLocality: TaskLocality.TaskLocality): HashSet[String] = - { - if (TaskLocality.PROCESS_LOCAL == taskLocality) { - // straight forward comparison ! Special case it. - val retval = new HashSet[String]() - scheduler.synchronized { - for (location <- _taskPreferredLocations) { - if (scheduler.isExecutorAliveOnHostPort(location)) { - retval += location - } - } - } - - return retval - } - - val taskPreferredLocations = { - if (TaskLocality.NODE_LOCAL == taskLocality) { - _taskPreferredLocations - } else { - assert (TaskLocality.RACK_LOCAL == taskLocality) - // Expand set to include all 'seen' rack local hosts. - // This works since container allocation/management happens within master - - // so any rack locality information is updated in msater. - // Best case effort, and maybe sort of kludge for now ... rework it later ? - val hosts = new HashSet[String] - _taskPreferredLocations.foreach(h => { - val rackOpt = scheduler.getRackForHost(h) - if (rackOpt.isDefined) { - val hostsOpt = scheduler.getCachedHostsForRack(rackOpt.get) - if (hostsOpt.isDefined) { - hosts ++= hostsOpt.get - } - } - - // Ensure that irrespective of what scheduler says, host is always added ! - hosts += h - }) - - hosts - } - } - - val retval = new HashSet[String] - scheduler.synchronized { - for (prefLocation <- taskPreferredLocations) { - val aliveLocationsOpt = scheduler.getExecutorsAliveOnHost(Utils.parseHostPort(prefLocation)._1) - if (aliveLocationsOpt.isDefined) { - retval ++= aliveLocationsOpt.get - } - } - } - - retval - } - - // Add a task to all the pending-task lists that it should be on. - private def addPendingTask(index: Int) { - // We can infer hostLocalLocations from rackLocalLocations by joining it against - // tasks(index).preferredLocations (with appropriate hostPort <-> host conversion). - // But not doing it for simplicity sake. If this becomes a performance issue, modify it. - val locs = tasks(index).preferredLocations - val processLocalLocations = findPreferredLocations(locs, sched, TaskLocality.PROCESS_LOCAL) - val hostLocalLocations = findPreferredLocations(locs, sched, TaskLocality.NODE_LOCAL) - val rackLocalLocations = findPreferredLocations(locs, sched, TaskLocality.RACK_LOCAL) - - if (rackLocalLocations.size == 0) { - // Current impl ensures this. - assert (processLocalLocations.size == 0) - assert (hostLocalLocations.size == 0) - pendingTasksWithNoPrefs += index - } else { - - // process local locality - for (hostPort <- processLocalLocations) { - // DEBUG Code - Utils.checkHostPort(hostPort) - - val hostPortList = pendingTasksForHostPort.getOrElseUpdate(hostPort, ArrayBuffer()) - hostPortList += index - } - - // host locality (includes process local) - for (hostPort <- hostLocalLocations) { - // DEBUG Code - Utils.checkHostPort(hostPort) - - val host = Utils.parseHostPort(hostPort)._1 - val hostList = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer()) - hostList += index - } - - // rack locality (includes process local and host local) - for (rackLocalHostPort <- rackLocalLocations) { - // DEBUG Code - Utils.checkHostPort(rackLocalHostPort) - - val rackLocalHost = Utils.parseHostPort(rackLocalHostPort)._1 - val list = pendingRackLocalTasksForHost.getOrElseUpdate(rackLocalHost, ArrayBuffer()) + /** + * Add a task to all the pending-task lists that it should be on. If readding is set, we are + * re-adding the task so only include it in each list if it's not already there. + */ + private def addPendingTask(index: Int, readding: Boolean = false) { + // Utility method that adds `index` to a list only if readding=false or it's not already there + def addTo(list: ArrayBuffer[Int]) { + if (!readding || !list.contains(index)) { list += index } } - allPendingTasks += index + var hadAliveLocations = false + for (loc <- tasks(index).preferredLocations) { + for (execId <- loc.executorId) { + if (sched.isExecutorAlive(execId)) { + addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer)) + hadAliveLocations = true + } + } + if (sched.hasExecutorsAliveOnHost(loc.host)) { + addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer)) + for (rack <- sched.getRackForHost(loc.host)) { + addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer)) + } + hadAliveLocations = true + } + } + + if (!hadAliveLocations) { + // Even though the task might've had preferred locations, all of those hosts or executors + // are dead; put it in the no-prefs list so we can schedule it elsewhere right away. + addTo(pendingTasksWithNoPrefs) + } + + addTo(allPendingTasks) } - // Return the pending tasks list for a given host port (process local), or an empty list if - // there is no map entry for that host - private def getPendingTasksForHostPort(hostPort: String): ArrayBuffer[Int] = { - // DEBUG Code - Utils.checkHostPort(hostPort) - pendingTasksForHostPort.getOrElse(hostPort, ArrayBuffer()) + /** + * Return the pending tasks list for a given executor ID, or an empty list if + * there is no map entry for that host + */ + private def getPendingTasksForExecutor(executorId: String): ArrayBuffer[Int] = { + pendingTasksForExecutor.getOrElse(executorId, ArrayBuffer()) } - // Return the pending tasks list for a given host, or an empty list if - // there is no map entry for that host - private def getPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = { - val host = Utils.parseHostPort(hostPort)._1 + /** + * Return the pending tasks list for a given host, or an empty list if + * there is no map entry for that host + */ + private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = { pendingTasksForHost.getOrElse(host, ArrayBuffer()) } - // Return the pending tasks (rack level) list for a given host, or an empty list if - // there is no map entry for that host - private def getRackLocalPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = { - val host = Utils.parseHostPort(hostPort)._1 - pendingRackLocalTasksForHost.getOrElse(host, ArrayBuffer()) + /** + * Return the pending rack-local task list for a given rack, or an empty list if + * there is no map entry for that rack + */ + private def getPendingTasksForRack(rack: String): ArrayBuffer[Int] = { + pendingTasksForRack.getOrElse(rack, ArrayBuffer()) } - // Number of pending tasks for a given host Port (which would be process local) - override def numPendingTasksForHostPort(hostPort: String): Int = { - getPendingTasksForHostPort(hostPort).count { index => - copiesRunning(index) == 0 && !finished(index) - } - } - - // Number of pending tasks for a given host (which would be data local) - override def numPendingTasksForHost(hostPort: String): Int = { - getPendingTasksForHost(hostPort).count { index => - copiesRunning(index) == 0 && !finished(index) - } - } - - // Number of pending rack local tasks for a given host - override def numRackLocalPendingTasksForHost(hostPort: String): Int = { - getRackLocalPendingTasksForHost(hostPort).count { index => - copiesRunning(index) == 0 && !finished(index) - } - } - - - // Dequeue a pending task from the given list and return its index. - // Return None if the list is empty. - // This method also cleans up any tasks in the list that have already - // been launched, since we want that to happen lazily. + /** + * Dequeue a pending task from the given list and return its index. + * Return None if the list is empty. + * This method also cleans up any tasks in the list that have already + * been launched, since we want that to happen lazily. + */ private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = { while (!list.isEmpty) { val index = list.last @@ -342,176 +213,145 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: return None } - // Return a speculative task for a given host if any are available. The task should not have an - // attempt running on this host, in case the host is slow. In addition, if locality is set, the - // task must have a preference for this host/rack/no preferred locations at all. - private def findSpeculativeTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = { + /** Check whether a task is currently running an attempt on a given host */ + private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = { + !taskAttempts(taskIndex).exists(_.host == host) + } - assert (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) + /** + * Return a speculative task for a given executor if any are available. The task should not have + * an attempt running on this host, in case the host is slow. In addition, the task should meet + * the given locality constraint. + */ + private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value) + : Option[(Int, TaskLocality.Value)] = + { speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set - if (speculatableTasks.size > 0) { - val localTask = speculatableTasks.find { index => - val locations = findPreferredLocations(tasks(index).preferredLocations, sched, - TaskLocality.NODE_LOCAL) - val attemptLocs = taskAttempts(index).map(_.hostPort) - (locations.size == 0 || locations.contains(hostPort)) && !attemptLocs.contains(hostPort) + if (!speculatableTasks.isEmpty) { + // Check for process-local or preference-less tasks; note that tasks can be process-local + // on multiple nodes when we replicate cached blocks, as in Spark Streaming + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + val prefs = tasks(index).preferredLocations + val executors = prefs.flatMap(_.executorId) + if (prefs.size == 0 || executors.contains(execId)) { + speculatableTasks -= index + return Some((index, TaskLocality.PROCESS_LOCAL)) + } } - if (localTask != None) { - speculatableTasks -= localTask.get - return localTask + // Check for node-local tasks + if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) { + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + val locations = tasks(index).preferredLocations.map(_.host) + if (locations.contains(host)) { + speculatableTasks -= index + return Some((index, TaskLocality.NODE_LOCAL)) + } + } } - // check for rack locality + // Check for rack-local tasks if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { - val rackTask = speculatableTasks.find { index => - val locations = findPreferredLocations(tasks(index).preferredLocations, sched, - TaskLocality.RACK_LOCAL) - val attemptLocs = taskAttempts(index).map(_.hostPort) - locations.contains(hostPort) && !attemptLocs.contains(hostPort) - } - - if (rackTask != None) { - speculatableTasks -= rackTask.get - return rackTask + for (rack <- sched.getRackForHost(host)) { + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + val racks = tasks(index).preferredLocations.map(_.host).map(sched.getRackForHost) + if (racks.contains(rack)) { + speculatableTasks -= index + return Some((index, TaskLocality.RACK_LOCAL)) + } + } } } - // Any task ... + // Check for non-local tasks if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) { - // Check for attemptLocs also ? - val nonLocalTask = speculatableTasks.find { i => - !taskAttempts(i).map(_.hostPort).contains(hostPort) - } - if (nonLocalTask != None) { - speculatableTasks -= nonLocalTask.get - return nonLocalTask + for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) { + speculatableTasks -= index + return Some((index, TaskLocality.ANY)) } } } + return None } - // Dequeue a pending task for a given node and return its index. - // If localOnly is set to false, allow non-local tasks as well. - private def findTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = { - val processLocalTask = findTaskFromList(getPendingTasksForHostPort(hostPort)) - if (processLocalTask != None) { - return processLocalTask + /** + * Dequeue a pending task for a given node and return its index and locality level. + * Only search for tasks matching the given locality constraint. + */ + private def findTask(execId: String, host: String, locality: TaskLocality.Value) + : Option[(Int, TaskLocality.Value)] = + { + for (index <- findTaskFromList(getPendingTasksForExecutor(execId))) { + return Some((index, TaskLocality.PROCESS_LOCAL)) } - val localTask = findTaskFromList(getPendingTasksForHost(hostPort)) - if (localTask != None) { - return localTask - } - - if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { - val rackLocalTask = findTaskFromList(getRackLocalPendingTasksForHost(hostPort)) - if (rackLocalTask != None) { - return rackLocalTask + if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) { + for (index <- findTaskFromList(getPendingTasksForHost(host))) { + return Some((index, TaskLocality.NODE_LOCAL)) } } - // Look for no pref tasks AFTER rack local tasks - this has side effect that we will get to - // failed tasks later rather than sooner. - // TODO: That code path needs to be revisited (adding to no prefs list when host:port goes down). - val noPrefTask = findTaskFromList(pendingTasksWithNoPrefs) - if (noPrefTask != None) { - return noPrefTask + if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) { + for { + rack <- sched.getRackForHost(host) + index <- findTaskFromList(getPendingTasksForRack(rack)) + } { + return Some((index, TaskLocality.RACK_LOCAL)) + } + } + + // Look for no-pref tasks after rack-local tasks since they can run anywhere. + for (index <- findTaskFromList(pendingTasksWithNoPrefs)) { + return Some((index, TaskLocality.PROCESS_LOCAL)) } if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) { - val nonLocalTask = findTaskFromList(allPendingTasks) - if (nonLocalTask != None) { - return nonLocalTask + for (index <- findTaskFromList(allPendingTasks)) { + return Some((index, TaskLocality.ANY)) } } // Finally, if all else has failed, find a speculative task - return findSpeculativeTask(hostPort, locality) + return findSpeculativeTask(execId, host, locality) } - private def isProcessLocalLocation(task: Task[_], hostPort: String): Boolean = { - Utils.checkHostPort(hostPort) - - val locs = task.preferredLocations - - locs.contains(hostPort) - } - - private def isHostLocalLocation(task: Task[_], hostPort: String): Boolean = { - val locs = task.preferredLocations - - // If no preference, consider it as host local - if (locs.isEmpty) return true - - val host = Utils.parseHostPort(hostPort)._1 - locs.find(h => Utils.parseHostPort(h)._1 == host).isDefined - } - - // Does a host count as a rack local preferred location for a task? - // (assumes host is NOT preferred location). - // This is true if either the task has preferred locations and this host is one, or it has - // no preferred locations (in which we still count the launch as preferred). - private def isRackLocalLocation(task: Task[_], hostPort: String): Boolean = { - - val locs = task.preferredLocations - - val preferredRacks = new HashSet[String]() - for (preferredHost <- locs) { - val rack = sched.getRackForHost(preferredHost) - if (None != rack) preferredRacks += rack.get - } - - if (preferredRacks.isEmpty) return false - - val hostRack = sched.getRackForHost(hostPort) - - return None != hostRack && preferredRacks.contains(hostRack.get) - } - - // Respond to an offer of a single slave from the scheduler by finding a task - override def slaveOffer( - execId: String, - hostPort: String, - availableCpus: Double, - overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = + /** + * Respond to an offer of a single slave from the scheduler by finding a task + */ + override def resourceOffer(execId: String, host: String, availableCpus: Double) + : Option[TaskDescription] = { if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) { + val curTime = System.currentTimeMillis + // If explicitly specified, use that - val locality = if (overrideLocality != null) overrideLocality else { + val locality = { // expand only if we have waited for more than LOCALITY_WAIT for a host local task ... - val time = System.currentTimeMillis - if (time - lastPreferredLaunchTime < LOCALITY_WAIT) { + // TODO(matei): Multi-level delay scheduling + if (curTime - lastPreferredLaunchTime < LOCALITY_WAIT) { TaskLocality.NODE_LOCAL } else { TaskLocality.ANY } } - findTask(hostPort, locality) match { - case Some(index) => { - // Found a task; do some bookkeeping and return a Mesos task for it + findTask(execId, host, locality) match { + case Some((index, taskLocality)) => { + // Found a task; do some bookkeeping and return a task description val task = tasks(index) val taskId = sched.newTaskId() // Figure out whether this should count as a preferred launch - val taskLocality = - if (isProcessLocalLocation(task, hostPort)) TaskLocality.PROCESS_LOCAL - else if (isHostLocalLocation(task, hostPort)) TaskLocality.NODE_LOCAL - else if (isRackLocalLocation(task, hostPort)) TaskLocality.RACK_LOCAL - else TaskLocality.ANY - val prefStr = taskLocality.toString logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format( - taskSet.id, index, taskId, execId, hostPort, prefStr)) + taskSet.id, index, taskId, execId, host, taskLocality)) // Do various bookkeeping copiesRunning(index) += 1 - val time = System.currentTimeMillis - val info = new TaskInfo(taskId, index, time, execId, hostPort, taskLocality) + val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality) taskInfos(taskId) = info taskAttempts(index) = info :: taskAttempts(index) if (taskLocality == TaskLocality.PROCESS_LOCAL || taskLocality == TaskLocality.NODE_LOCAL) { - lastPreferredLaunchTime = time + lastPreferredLaunchTime = curTime } // Serialize and return the task val startTime = System.currentTimeMillis @@ -534,6 +374,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: return None } + /** Called by cluster scheduler when one of our tasks changes state */ override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { SparkEnv.set(env) state match { @@ -566,7 +407,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: if (!finished(index)) { tasksFinished += 1 logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format( - tid, info.duration, info.hostPort, tasksFinished, numTasks)) + tid, info.duration, info.host, tasksFinished, numTasks)) // Deserialize task result and pass it to the scheduler try { val result = ser.deserialize[TaskResult[_]](serializedData) @@ -698,44 +539,33 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: } } - // TODO(xiajunluan): for now we just find Pool not TaskSetManager - // we can extend this function in future if needed override def getSchedulableByName(name: String): Schedulable = { return null } - override def addSchedulable(schedulable:Schedulable) { - // nothing - } + override def addSchedulable(schedulable: Schedulable) {} - override def removeSchedulable(schedulable:Schedulable) { - // nothing - } + override def removeSchedulable(schedulable: Schedulable) {} override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { - var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager] + var sortedTaskSetQueue = ArrayBuffer[TaskSetManager](this) sortedTaskSetQueue += this return sortedTaskSetQueue } - override def executorLost(execId: String, hostPort: String) { + /** Called by cluster scheduler when an executor is lost so we can re-enqueue our tasks */ + override def executorLost(execId: String, host: String) { logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id) - // If some task has preferred locations only on hostname, and there are no more executors there, - // put it in the no-prefs list to avoid the wait from delay scheduling - - // host local tasks - should we push this to rack local or no pref list ? For now, preserving - // behavior and moving to no prefs list. Note, this was done due to impliations related to - // 'waiting' for data local tasks, etc. - // Note: NOT checking process local list - since host local list is super set of that. We need - // to ad to no prefs only if there is no host local node for the task (not if there is no - // process local node for the task) - for (index <- getPendingTasksForHost(Utils.parseHostPort(hostPort)._1)) { - val newLocs = findPreferredLocations( - tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL) - if (newLocs.isEmpty) { - pendingTasksWithNoPrefs += index - } + // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a + // task that used to have locations on only this host might now go to the no-prefs list. Note + // that it's okay if we add a task to the same queue twice (if it had multiple preferred + // locations), because findTaskFromList will skip already-running tasks. + for (index <- getPendingTasksForExecutor(execId)) { + addPendingTask(index, readding=true) + } + for (index <- getPendingTasksForHost(host)) { + addPendingTask(index, readding=true) } // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage @@ -789,7 +619,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: !speculatableTasks.contains(index)) { logInfo( "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format( - taskSet.id, index, info.hostPort, threshold)) + taskSet.id, index, info.host, threshold)) speculatableTasks += index foundTasks = true } diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index 075a7cbf7e..3b49af1258 100644 --- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -37,7 +37,9 @@ import spark.scheduler.cluster.StandaloneClusterMessages._ */ private[spark] class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: ActorSystem) - extends SchedulerBackend with Logging { + extends SchedulerBackend with Logging +{ + // TODO(matei): periodically revive offers as in MesosScheduler // Use an atomic variable to track total number of cores in the cluster for simplicity and speed var totalCoreCount = new AtomicInteger(0) @@ -45,7 +47,7 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor { private val executorActor = new HashMap[String, ActorRef] private val executorAddress = new HashMap[String, Address] - private val executorHostPort = new HashMap[String, String] + private val executorHost = new HashMap[String, String] private val freeCores = new HashMap[String, Int] private val actorToExecutorId = new HashMap[ActorRef, String] private val addressToExecutorId = new HashMap[Address, String] @@ -65,7 +67,7 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor sender ! RegisteredExecutor(sparkProperties) context.watch(sender) executorActor(executorId) = sender - executorHostPort(executorId) = hostPort + executorHost(executorId) = Utils.parseHostPort(hostPort)._1 freeCores(executorId) = cores executorAddress(executorId) = sender.path.address actorToExecutorId(sender) = executorId @@ -105,13 +107,13 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor // Make fake resource offers on all executors def makeOffers() { launchTasks(scheduler.resourceOffers( - executorHostPort.toArray.map {case (id, hostPort) => new WorkerOffer(id, hostPort, freeCores(id))})) + executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))})) } // Make fake resource offers on just one executor def makeOffers(executorId: String) { launchTasks(scheduler.resourceOffers( - Seq(new WorkerOffer(executorId, executorHostPort(executorId), freeCores(executorId))))) + Seq(new WorkerOffer(executorId, executorHost(executorId), freeCores(executorId))))) } // Launch tasks returned by a set of resource offers @@ -130,9 +132,8 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor actorToExecutorId -= executorActor(executorId) addressToExecutorId -= executorAddress(executorId) executorActor -= executorId - executorHostPort -= executorId + executorHost -= executorId freeCores -= executorId - executorHostPort -= executorId totalCoreCount.addAndGet(-numCores) scheduler.executorLost(executorId, SlaveLost(reason)) } diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala b/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala index c693b722ac..c2c5522686 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala +++ b/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala @@ -28,11 +28,9 @@ class TaskInfo( val index: Int, val launchTime: Long, val executorId: String, - val hostPort: String, + val host: String, val taskLocality: TaskLocality.TaskLocality) { - Utils.checkHostPort(hostPort, "Expected hostport") - var finishTime: Long = 0 var failed = false diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskLocality.scala b/core/src/main/scala/spark/scheduler/cluster/TaskLocality.scala new file mode 100644 index 0000000000..1c33e41f87 --- /dev/null +++ b/core/src/main/scala/spark/scheduler/cluster/TaskLocality.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.scheduler.cluster + + +private[spark] object TaskLocality + extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY") +{ + // process local is expected to be used ONLY within tasksetmanager for now. + val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value + + type TaskLocality = Value + + def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = { + condition <= constraint + } +} diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala index 1a92a5ed6f..277654edc0 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala @@ -29,17 +29,8 @@ private[spark] trait TaskSetManager extends Schedulable { def taskSet: TaskSet - def slaveOffer( - execId: String, - hostPort: String, - availableCpus: Double, - overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] - - def numPendingTasksForHostPort(hostPort: String): Int - - def numRackLocalPendingTasksForHost(hostPort: String): Int - - def numPendingTasksForHost(hostPort: String): Int + def resourceOffer(execId: String, hostPort: String, availableCpus: Double) + : Option[TaskDescription] def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) diff --git a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala b/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala index 06d1203f70..1d09bd9b03 100644 --- a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala +++ b/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala @@ -21,5 +21,4 @@ package spark.scheduler.cluster * Represents free resources available on an executor. */ private[spark] -class WorkerOffer(val executorId: String, val hostPort: String, val cores: Int) { -} +class WorkerOffer(val executorId: String, val host: String, val cores: Int) diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala index 6c43928bc8..a4f5f46777 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala +++ b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala @@ -141,7 +141,8 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: for (manager <- sortedTaskSetQueue) { do { launchTask = false - manager.slaveOffer(null, null, freeCpuCores) match { + // TODO(matei): don't pass null here? + manager.resourceOffer(null, null, freeCpuCores) match { case Some(task) => tasks += task taskIdToTaskSetId(task.taskId) = manager.taskSet.id diff --git a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala index c38eeb9e11..698c777bec 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala @@ -98,14 +98,11 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas return None } - override def slaveOffer( - execId: String, - hostPort: String, - availableCpus: Double, - overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = + override def resourceOffer(execId: String, host: String, availableCpus: Double) + : Option[TaskDescription] = { SparkEnv.set(sched.env) - logDebug("availableCpus:%d,numFinished:%d,numTasks:%d".format( + logDebug("availableCpus:%d, numFinished:%d, numTasks:%d".format( availableCpus.toInt, numFinished, numTasks)) if (availableCpus > 0 && numFinished < numTasks) { findTask() match { @@ -131,18 +128,6 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas return None } - override def numPendingTasksForHostPort(hostPort: String): Int = { - return 0 - } - - override def numRackLocalPendingTasksForHost(hostPort :String): Int = { - return 0 - } - - override def numPendingTasksForHost(hostPort: String): Int = { - return 0 - } - override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { SparkEnv.set(env) state match { diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/spark/storage/BlockManager.scala index 3a72474419..2a6ec2a55d 100644 --- a/core/src/main/scala/spark/storage/BlockManager.scala +++ b/core/src/main/scala/spark/storage/BlockManager.scala @@ -1004,43 +1004,43 @@ private[spark] object BlockManager extends Logging { } } - def blockIdsToExecutorLocations(blockIds: Array[String], env: SparkEnv, blockManagerMaster: BlockManagerMaster = null): HashMap[String, List[String]] = { + def blockIdsToBlockManagers( + blockIds: Array[String], + env: SparkEnv, + blockManagerMaster: BlockManagerMaster = null) + : Map[String, Seq[BlockManagerId]] = + { // env == null and blockManagerMaster != null is used in tests assert (env != null || blockManagerMaster != null) - val locationBlockIds: Seq[Seq[BlockManagerId]] = - if (env != null) { - env.blockManager.getLocationBlockIds(blockIds) - } else { - blockManagerMaster.getLocations(blockIds) - } - - // Convert from block master locations to executor locations (we need that for task scheduling) - val executorLocations = new HashMap[String, List[String]]() - for (i <- 0 until blockIds.length) { - val blockId = blockIds(i) - val blockLocations = locationBlockIds(i) - - val executors = new HashSet[String]() - - if (env != null) { - for (bkLocation <- blockLocations) { - val executorHostPort = env.resolveExecutorIdToHostPort(bkLocation.executorId, bkLocation.host) - executors += executorHostPort - // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort) - } - } else { - // Typically while testing, etc - revert to simply using host. - for (bkLocation <- blockLocations) { - executors += bkLocation.host - // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort) - } - } - - executorLocations.put(blockId, executors.toSeq.toList) + val blockLocations: Seq[Seq[BlockManagerId]] = if (env != null) { + env.blockManager.getLocationBlockIds(blockIds) + } else { + blockManagerMaster.getLocations(blockIds) } - executorLocations + val blockManagers = new HashMap[String, Seq[BlockManagerId]] + for (i <- 0 until blockIds.length) { + blockManagers(blockIds(i)) = blockLocations(i) + } + blockManagers.toMap } + def blockIdsToExecutorIds( + blockIds: Array[String], + env: SparkEnv, + blockManagerMaster: BlockManagerMaster = null) + : Map[String, Seq[String]] = + { + blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.executorId)) + } + + def blockIdsToHosts( + blockIds: Array[String], + env: SparkEnv, + blockManagerMaster: BlockManagerMaster = null) + : Map[String, Seq[String]] = + { + blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.host)) + } } diff --git a/core/src/main/scala/spark/ui/jobs/StagePage.scala b/core/src/main/scala/spark/ui/jobs/StagePage.scala index 797513f266..6948ea4dd9 100644 --- a/core/src/main/scala/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/spark/ui/jobs/StagePage.scala @@ -156,7 +156,7 @@ private[spark] class StagePage(parent: JobProgressUI) { {info.taskId} {info.status} {info.taskLocality} - {info.hostPort} + {info.host} {dateFmt.format(new Date(info.launchTime))} {formatDuration} diff --git a/core/src/test/scala/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/spark/MapOutputTrackerSuite.scala index ce6cec0451..c21f3331d0 100644 --- a/core/src/test/scala/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/spark/MapOutputTrackerSuite.scala @@ -112,22 +112,22 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext { "akka://spark@localhost:" + boundPort + "/user/MapOutputTracker") masterTracker.registerShuffle(10, 1) - masterTracker.incrementGeneration() - slaveTracker.updateGeneration(masterTracker.getGeneration) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) } val compressedSize1000 = MapOutputTracker.compressSize(1000L) val size1000 = MapOutputTracker.decompressSize(compressedSize1000) masterTracker.registerMapOutput(10, 0, new MapStatus( BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000))) - masterTracker.incrementGeneration() - slaveTracker.updateGeneration(masterTracker.getGeneration) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) assert(slaveTracker.getServerStatuses(10, 0).toSeq === Seq((BlockManagerId("a", "hostA", 1000, 0), size1000))) masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0)) - masterTracker.incrementGeneration() - slaveTracker.updateGeneration(masterTracker.getGeneration) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) } // failure should be cached diff --git a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala index 05afcd6567..6327155157 100644 --- a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala +++ b/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala @@ -72,7 +72,9 @@ class DummyTaskSetManager( override def executorLost(executorId: String, host: String): Unit = { } - override def slaveOffer(execId: String, host: String, avaiableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = { + override def resourceOffer(execId: String, host: String, availableCpus: Double) + : Option[TaskDescription] = + { if (tasksFinished + runningTasks < numTasks) { increaseRunningTasks(1) return Some(new TaskDescription(0, execId, "task 0:0", null)) @@ -118,7 +120,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging logInfo("parentName:%s, parent running tasks:%d, name:%s,runningTasks:%d".format(manager.parent.name, manager.parent.runningTasks, manager.name, manager.runningTasks)) } for (taskSet <- taskSetQueue) { - taskSet.slaveOffer("execId_1", "hostname_1", 1) match { + taskSet.resourceOffer("execId_1", "hostname_1", 1) match { case Some(task) => return taskSet.stageId case None => {} diff --git a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala index caaf3209fd..3b4a0d52fc 100644 --- a/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/spark/scheduler/DAGSchedulerSuite.scala @@ -59,7 +59,7 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont override def stop() = {} override def submitTasks(taskSet: TaskSet) = { // normally done by TaskSetManager - taskSet.tasks.foreach(_.generation = mapOutputTracker.getGeneration) + taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch) taskSets += taskSet } override def setListener(listener: TaskSchedulerListener) = {} @@ -299,10 +299,10 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont val reduceRdd = makeRdd(2, List(shuffleDep)) submit(reduceRdd, Array(0, 1)) // pretend we were told hostA went away - val oldGeneration = mapOutputTracker.getGeneration + val oldEpoch = mapOutputTracker.getEpoch runEvent(ExecutorLost("exec-hostA")) - val newGeneration = mapOutputTracker.getGeneration - assert(newGeneration > oldGeneration) + val newEpoch = mapOutputTracker.getEpoch + assert(newEpoch > oldEpoch) val noAccum = Map[Long, Any]() val taskSet = taskSets(0) // should be ignored for being too old @@ -311,8 +311,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum, null, null)) // should be ignored for being too old runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum, null, null)) - // should work because it's a new generation - taskSet.tasks(1).generation = newGeneration + // should work because it's a new epoch + taskSet.tasks(1).epoch = newEpoch runEvent(CompletionEvent(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum, null, null)) assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) === Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA"))) @@ -401,12 +401,14 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont assert(results === Map(0 -> 42)) } - /** Assert that the supplied TaskSet has exactly the given preferredLocations. Note, converts taskSet's locations to host only. */ - private def assertLocations(taskSet: TaskSet, locations: Seq[Seq[String]]) { - assert(locations.size === taskSet.tasks.size) - for ((expectLocs, taskLocs) <- - taskSet.tasks.map(_.preferredLocations).zip(locations)) { - assert(expectLocs.map(loc => spark.Utils.parseHostPort(loc)._1) === taskLocs) + /** + * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations. + * Note that this checks only the host and not the executor ID. + */ + private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]) { + assert(hosts.size === taskSet.tasks.size) + for ((taskLocs, expectedLocs) <- taskSet.tasks.map(_.preferredLocations).zip(hosts)) { + assert(taskLocs.map(_.host) === expectedLocs) } } From cf39d45d14e0256bbd3ffe206c14997f02429cb3 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 13 Aug 2013 12:32:40 -0700 Subject: [PATCH 066/136] More scheduling fixes: - Added periodic revival of offers in StandaloneSchedulerBackend - Replaced task scheduling aggression with multi-level delay scheduling in ClusterTaskSetManager - Fixed ZippedRDD preferred locations because they can't currently be process-local - Fixed some uses of hostPort --- core/src/main/scala/spark/SparkEnv.scala | 19 +-- core/src/main/scala/spark/Utils.scala | 31 +---- .../scala/spark/deploy/DeployMessage.scala | 1 + .../spark/deploy/worker/ExecutorRunner.scala | 6 +- .../scala/spark/deploy/worker/Worker.scala | 2 +- .../main/scala/spark/rdd/CartesianRDD.scala | 2 +- .../scala/spark/rdd/ZippedPartitionsRDD.scala | 32 ++--- core/src/main/scala/spark/rdd/ZippedRDD.scala | 23 +--- .../scheduler/cluster/ClusterScheduler.scala | 68 +---------- .../cluster/ClusterTaskSetManager.scala | 114 +++++++++++++----- .../cluster/StandaloneSchedulerBackend.scala | 7 +- 11 files changed, 116 insertions(+), 189 deletions(-) diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala index 0adbf1d96e..bca90886a3 100644 --- a/core/src/main/scala/spark/SparkEnv.scala +++ b/core/src/main/scala/spark/SparkEnv.scala @@ -54,11 +54,7 @@ class SparkEnv ( val connectionManager: ConnectionManager, val httpFileServer: HttpFileServer, val sparkFilesDir: String, - val metricsSystem: MetricsSystem, - // To be set only as part of initialization of SparkContext. - // (executorId, defaultHostPort) => executorHostPort - // If executorId is NOT found, return defaultHostPort - var executorIdToHostPort: Option[(String, String) => String]) { + val metricsSystem: MetricsSystem) { private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]() @@ -83,16 +79,6 @@ class SparkEnv ( pythonWorkers.getOrElseUpdate(key, new PythonWorkerFactory(pythonExec, envVars)).create() } } - - def resolveExecutorIdToHostPort(executorId: String, defaultHostPort: String): String = { - val env = SparkEnv.get - if (env.executorIdToHostPort.isEmpty) { - // default to using host, not host port. Relevant to non cluster modes. - return defaultHostPort - } - - env.executorIdToHostPort.get(executorId, defaultHostPort) - } } object SparkEnv extends Logging { @@ -236,7 +222,6 @@ object SparkEnv extends Logging { connectionManager, httpFileServer, sparkFilesDir, - metricsSystem, - None) + metricsSystem) } } diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala index 885a7391d6..a05dcdcd97 100644 --- a/core/src/main/scala/spark/Utils.scala +++ b/core/src/main/scala/spark/Utils.scala @@ -393,41 +393,14 @@ private object Utils extends Logging { retval } -/* - // Used by DEBUG code : remove when all testing done - private val ipPattern = Pattern.compile("^[0-9]+(\\.[0-9]+)*$") def checkHost(host: String, message: String = "") { - // Currently catches only ipv4 pattern, this is just a debugging tool - not rigourous ! - // if (host.matches("^[0-9]+(\\.[0-9]+)*$")) { - if (ipPattern.matcher(host).matches()) { - Utils.logErrorWithStack("Unexpected to have host " + host + " which matches IP pattern. Message " + message) - } - if (Utils.parseHostPort(host)._2 != 0){ - Utils.logErrorWithStack("Unexpected to have host " + host + " which has port in it. Message " + message) - } + assert(host.indexOf(':') == -1, message) } - // Used by DEBUG code : remove when all testing done def checkHostPort(hostPort: String, message: String = "") { - val (host, port) = Utils.parseHostPort(hostPort) - checkHost(host) - if (port <= 0){ - Utils.logErrorWithStack("Unexpected to have port " + port + " which is not valid in " + hostPort + ". Message " + message) - } + assert(hostPort.indexOf(':') != -1, message) } - // Used by DEBUG code : remove when all testing done - def logErrorWithStack(msg: String) { - try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } } - // temp code for debug - System.exit(-1) - } -*/ - - // Once testing is complete in various modes, replace with this ? - def checkHost(host: String, message: String = "") {} - def checkHostPort(hostPort: String, message: String = "") {} - // Used by DEBUG code : remove when all testing done def logErrorWithStack(msg: String) { try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } } diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala index 31861f3ac2..0db13ffc98 100644 --- a/core/src/main/scala/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/spark/deploy/DeployMessage.scala @@ -80,6 +80,7 @@ private[deploy] object DeployMessages { case class RegisteredApplication(appId: String) extends DeployMessage + // TODO(matei): replace hostPort with host case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) { Utils.checkHostPort(hostPort, "Required hostport") } diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala index f661accd2f..5e53d95ac2 100644 --- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala @@ -40,13 +40,11 @@ private[spark] class ExecutorRunner( val memory: Int, val worker: ActorRef, val workerId: String, - val hostPort: String, + val host: String, val sparkHome: File, val workDir: File) extends Logging { - Utils.checkHostPort(hostPort, "Expected hostport") - val fullId = appId + "/" + execId var workerThread: Thread = null var process: Process = null @@ -92,7 +90,7 @@ private[spark] class ExecutorRunner( /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */ def substituteVariables(argument: String): String = argument match { case "{{EXECUTOR_ID}}" => execId.toString - case "{{HOSTNAME}}" => Utils.parseHostPort(hostPort)._1 + case "{{HOSTNAME}}" => host case "{{CORES}}" => cores.toString case other => other } diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala index d4b58fc34e..053ac55226 100644 --- a/core/src/main/scala/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/spark/deploy/worker/Worker.scala @@ -132,7 +132,7 @@ private[spark] class Worker( case LaunchExecutor(appId, execId, appDesc, cores_, memory_, execSparkHome_) => logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name)) val manager = new ExecutorRunner( - appId, execId, appDesc, cores_, memory_, self, workerId, host + ":" + port, new File(execSparkHome_), workDir) + appId, execId, appDesc, cores_, memory_, self, workerId, host, new File(execSparkHome_), workDir) executors(appId + "/" + execId) = manager manager.start() coresUsed += cores_ diff --git a/core/src/main/scala/spark/rdd/CartesianRDD.scala b/core/src/main/scala/spark/rdd/CartesianRDD.scala index 150e5bca29..91b3e69d6f 100644 --- a/core/src/main/scala/spark/rdd/CartesianRDD.scala +++ b/core/src/main/scala/spark/rdd/CartesianRDD.scala @@ -64,7 +64,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest]( override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] - rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2) + (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext) = { diff --git a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala index 51f5cc3251..9a0831bd89 100644 --- a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala @@ -55,29 +55,15 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest]( } override def getPreferredLocations(s: Partition): Seq[String] = { - // TODO(matei): Fix this for hostPort - - // Note that as number of rdd's increase and/or number of slaves in cluster increase, the computed preferredLocations below - // become diminishingly small : so we might need to look at alternate strategies to alleviate this. - // If there are no (or very small number of preferred locations), we will end up transferred the blocks to 'any' node in the - // cluster - paying with n/w and cache cost. - // Maybe pick a node which figures max amount of time ? - // Choose node which is hosting 'larger' of some subset of blocks ? - // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible) - val splits = s.asInstanceOf[ZippedPartitionsPartition].partitions - val rddSplitZip = rdds.zip(splits) - - // exact match. - val exactMatchPreferredLocations = rddSplitZip.map(x => x._1.preferredLocations(x._2)) - val exactMatchLocations = exactMatchPreferredLocations.reduce((x, y) => x.intersect(y)) - - // Remove exact match and then do host local match. - val exactMatchHosts = exactMatchLocations.map(Utils.parseHostPort(_)._1) - val matchPreferredHosts = exactMatchPreferredLocations.map(locs => locs.map(Utils.parseHostPort(_)._1)) - .reduce((x, y) => x.intersect(y)) - val otherNodeLocalLocations = matchPreferredHosts.filter { s => !exactMatchHosts.contains(s) } - - otherNodeLocalLocations ++ exactMatchLocations + val parts = s.asInstanceOf[ZippedPartitionsPartition].partitions + val prefs = rdds.zip(parts).map { case (rdd, p) => rdd.preferredLocations(p) } + // Check whether there are any hosts that match all RDDs; otherwise return the union + val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y)) + if (!exactMatchLocations.isEmpty) { + exactMatchLocations + } else { + prefs.flatten.distinct + } } override def clearDependencies() { diff --git a/core/src/main/scala/spark/rdd/ZippedRDD.scala b/core/src/main/scala/spark/rdd/ZippedRDD.scala index b1c43b3195..4074e50e44 100644 --- a/core/src/main/scala/spark/rdd/ZippedRDD.scala +++ b/core/src/main/scala/spark/rdd/ZippedRDD.scala @@ -65,27 +65,16 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest]( } override def getPreferredLocations(s: Partition): Seq[String] = { - // Note that as number of slaves in cluster increase, the computed preferredLocations can become small : so we might need - // to look at alternate strategies to alleviate this. (If there are no (or very small number of preferred locations), we - // will end up transferred the blocks to 'any' node in the cluster - paying with n/w and cache cost. - // Maybe pick one or the other ? (so that atleast one block is local ?). - // Choose node which is hosting 'larger' of the blocks ? - // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible) val (partition1, partition2) = s.asInstanceOf[ZippedPartition[T, U]].partitions val pref1 = rdd1.preferredLocations(partition1) val pref2 = rdd2.preferredLocations(partition2) - - // exact match - instance local and host local. + // Check whether there are any hosts that match both RDDs; otherwise return the union val exactMatchLocations = pref1.intersect(pref2) - - // remove locations which are already handled via exactMatchLocations, and intersect where both partitions are node local. - val otherNodeLocalPref1 = pref1.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1) - val otherNodeLocalPref2 = pref2.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1) - val otherNodeLocalLocations = otherNodeLocalPref1.intersect(otherNodeLocalPref2) - - - // Can have mix of instance local (hostPort) and node local (host) locations as preference ! - exactMatchLocations ++ otherNodeLocalLocations + if (!exactMatchLocations.isEmpty) { + exactMatchLocations + } else { + (pref1 ++ pref2).distinct + } } override def clearDependencies() { diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala index 036e36bca0..ec76e90185 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala @@ -45,39 +45,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) // Threshold above which we warn user initial TaskSet may be starved val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong - // How often to revive offers in case there are pending tasks - that is how often to try to get - // tasks scheduled in case there are nodes available : default 0 is to disable it - to preserve existing behavior - // Note that this is required due to delay scheduling due to data locality waits, etc. - // TODO(matei): move to StandaloneSchedulerBackend? - val TASK_REVIVAL_INTERVAL = System.getProperty("spark.scheduler.revival.interval", "1000").toLong - - // TODO(matei): replace this with something that only affects levels past PROCESS_LOCAL; - // basically it can be a "cliff" for locality - /* - This property controls how aggressive we should be to modulate waiting for node local task scheduling. - To elaborate, currently there is a time limit (3 sec def) to ensure that spark attempts to wait for node locality of tasks before - scheduling on other nodes. We have modified this in yarn branch such that offers to task set happen in prioritized order : - node-local, rack-local and then others - But once all available node local (and no pref) tasks are scheduled, instead of waiting for 3 sec before - scheduling to other nodes (which degrades performance for time sensitive tasks and on larger clusters), we can - modulate that : to also allow rack local nodes or any node. The default is still set to HOST - so that previous behavior is - maintained. This is to allow tuning the tension between pulling rdd data off node and scheduling computation asap. - - TODO: rename property ? The value is one of - - NODE_LOCAL (default, no change w.r.t current behavior), - - RACK_LOCAL and - - ANY - - Note that this property makes more sense when used in conjugation with spark.tasks.revive.interval > 0 : else it is not very effective. - - Additional Note: For non trivial clusters, there is a 4x - 5x reduction in running time (in some of our experiments) based on whether - it is left at default NODE_LOCAL, RACK_LOCAL (if cluster is configured to be rack aware) or ANY. - If cluster is rack aware, then setting it to RACK_LOCAL gives best tradeoff and a 3x - 4x performance improvement while minimizing IO impact. - Also, it brings down the variance in running time drastically. - */ - val TASK_SCHEDULING_AGGRESSION = TaskLocality.withName( - System.getProperty("spark.tasks.schedule.aggression", "NODE_LOCAL")) - val activeTaskSets = new HashMap[String, TaskSetManager] val taskIdToTaskSetId = new HashMap[Long, String] @@ -136,14 +103,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) } } schedulableBuilder.buildPools() - // resolve executorId to hostPort mapping. - def executorToHostPort(executorId: String, defaultHostPort: String): String = { - executorIdToHost.getOrElse(executorId, defaultHostPort) - } - - // Unfortunately, this means that SparkEnv is indirectly referencing ClusterScheduler - // Will that be a design violation ? - SparkEnv.get.executorIdToHostPort = Some(executorToHostPort) } def newTaskId(): Long = nextTaskId.getAndIncrement() @@ -168,28 +127,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) } }.start() } - - - // Change to always run with some default if TASK_REVIVAL_INTERVAL <= 0 ? - // TODO(matei): remove this thread - if (TASK_REVIVAL_INTERVAL > 0) { - new Thread("ClusterScheduler task offer revival check") { - setDaemon(true) - - override def run() { - logInfo("Starting speculative task offer revival thread") - while (true) { - try { - Thread.sleep(TASK_REVIVAL_INTERVAL) - } catch { - case e: InterruptedException => {} - } - - if (hasPendingTasks()) backend.reviveOffers() - } - } - }.start() - } } override def submitTasks(taskSet: TaskSet) { @@ -329,7 +266,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) backend.reviveOffers() } if (taskFailed) { - // Also revive offers if a task had failed for some reason other than host lost backend.reviveOffers() } @@ -384,7 +320,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext) } // Check for pending tasks in all our active jobs. - def hasPendingTasks(): Boolean = { + def hasPendingTasks: Boolean = { synchronized { rootPool.hasPendingTasks() } @@ -416,10 +352,8 @@ private[spark] class ClusterScheduler(val sc: SparkContext) /** Remove an executor from all our data structures and mark it as lost */ private def removeExecutor(executorId: String) { - // TODO(matei): remove HostPort activeExecutorIds -= executorId val host = executorIdToHost(executorId) - val execs = executorsByHost.getOrElse(host, new HashSet) execs -= executorId if (execs.isEmpty) { diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala index 1947c516db..cf406f876f 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -42,9 +42,6 @@ import spark.TaskResultTooBigFailure private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: TaskSet) extends TaskSetManager with Logging { - // Maximum time to wait to run a task in a preferred location (in ms) - val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong - // CPUs to request per task val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toDouble @@ -74,8 +71,6 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: var stageId = taskSet.stageId var name = "TaskSet_"+taskSet.stageId.toString var parent: Schedulable = null - // Last time when we launched a preferred task (for delay scheduling) - var lastPreferredLaunchTime = System.currentTimeMillis // Set of pending tasks for each executor. These collections are actually // treated as stacks, in which new tasks are added to the end of the @@ -114,11 +109,9 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: val EXCEPTION_PRINT_INTERVAL = System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong - // Map of recent exceptions (identified by string representation and - // top stack frame) to duplicate count (how many times the same - // exception has appeared) and time the full exception was - // printed. This should ideally be an LRU map that can drop old - // exceptions automatically. + // Map of recent exceptions (identified by string representation and top stack frame) to + // duplicate count (how many times the same exception has appeared) and time the full exception + // was printed. This should ideally be an LRU map that can drop old exceptions automatically. val recentExceptions = HashMap[String, (Int, Long)]() // Figure out the current map output tracker epoch and set it on all tasks @@ -134,6 +127,16 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: addPendingTask(i) } + // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling + val myLocalityLevels = computeValidLocalityLevels() + val localityWaits = myLocalityLevels.map(getLocalityWait) // spark.locality.wait + + // Delay scheduling variables: we keep track of our current locality level and the time we + // last launched a task at that level, and move up a level when localityWaits[curLevel] expires. + // We then move down if we manage to launch a "more local" task. + var currentLocalityIndex = 0 // Index of our current locality level in validLocalityLevels + var lastLaunchTime = System.currentTimeMillis() // Time we last launched a task at this level + /** * Add a task to all the pending-task lists that it should be on. If readding is set, we are * re-adding the task so only include it in each list if it's not already there. @@ -169,7 +172,9 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: addTo(pendingTasksWithNoPrefs) } - addTo(allPendingTasks) + if (!readding) { + allPendingTasks += index // No point scanning this whole list to find the old task there + } } /** @@ -324,18 +329,9 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: : Option[TaskDescription] = { if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) { - val curTime = System.currentTimeMillis + val curTime = System.currentTimeMillis() - // If explicitly specified, use that - val locality = { - // expand only if we have waited for more than LOCALITY_WAIT for a host local task ... - // TODO(matei): Multi-level delay scheduling - if (curTime - lastPreferredLaunchTime < LOCALITY_WAIT) { - TaskLocality.NODE_LOCAL - } else { - TaskLocality.ANY - } - } + val locality = getAllowedLocalityLevel(curTime) findTask(execId, host, locality) match { case Some((index, taskLocality)) => { @@ -350,16 +346,16 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality) taskInfos(taskId) = info taskAttempts(index) = info :: taskAttempts(index) - if (taskLocality == TaskLocality.PROCESS_LOCAL || taskLocality == TaskLocality.NODE_LOCAL) { - lastPreferredLaunchTime = curTime - } + // Update our locality level for delay scheduling + currentLocalityIndex = getLocalityIndex(locality) + lastLaunchTime = curTime // Serialize and return the task - val startTime = System.currentTimeMillis + val startTime = System.currentTimeMillis() // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here // we assume the task can be serialized without exceptions. val serializedTask = Task.serializeWithDependencies( task, sched.sc.addedFiles, sched.sc.addedJars, ser) - val timeTaken = System.currentTimeMillis - startTime + val timeTaken = System.currentTimeMillis() - startTime increaseRunningTasks(1) logInfo("Serialized task %s:%d as %d bytes in %d ms".format( taskSet.id, index, serializedTask.limit, timeTaken)) @@ -374,6 +370,34 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: return None } + /** + * Get the level we can launch tasks according to delay scheduling, based on current wait time. + */ + private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = { + while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) && + currentLocalityIndex < myLocalityLevels.length - 1) + { + // Jump to the next locality level, and remove our waiting time for the current one since + // we don't want to count it again on the next one + lastLaunchTime += localityWaits(currentLocalityIndex) + currentLocalityIndex += 1 + } + myLocalityLevels(currentLocalityIndex) + } + + /** + * Find the index in myLocalityLevels for a given locality. This is also designed to work with + * localities that are not in myLocalityLevels (in case we somehow get those) by returning the + * next-biggest level we have. Uses the fact that the last value in myLocalityLevels is ANY. + */ + def getLocalityIndex(locality: TaskLocality.TaskLocality): Int = { + var index = 0 + while (locality > myLocalityLevels(index)) { + index += 1 + } + index + } + /** Called by cluster scheduler when one of our tasks changes state */ override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { SparkEnv.set(env) @@ -467,7 +491,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: case ef: ExceptionFailure => sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null)) val key = ef.description - val now = System.currentTimeMillis + val now = System.currentTimeMillis() val (printFull, dupCount) = { if (recentExceptions.contains(key)) { val (dupCount, printTime) = recentExceptions(key) @@ -631,4 +655,38 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: override def hasPendingTasks(): Boolean = { numTasks > 0 && tasksFinished < numTasks } + + private def getLocalityWait(level: TaskLocality.TaskLocality): Long = { + val defaultWait = System.getProperty("spark.locality.wait", "3000") + level match { + case TaskLocality.PROCESS_LOCAL => + System.getProperty("spark.locality.wait.process", defaultWait).toLong + case TaskLocality.NODE_LOCAL => + System.getProperty("spark.locality.wait.node", defaultWait).toLong + case TaskLocality.RACK_LOCAL => + System.getProperty("spark.locality.wait.rack", defaultWait).toLong + case TaskLocality.ANY => + 0L + } + } + + /** + * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been + * added to queues using addPendingTask. + */ + private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = { + import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY} + val levels = new ArrayBuffer[TaskLocality.TaskLocality] + if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0) { + levels += PROCESS_LOCAL + } + if (!pendingTasksForHost.isEmpty && getLocalityWait(NODE_LOCAL) != 0) { + levels += NODE_LOCAL + } + if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) { + levels += RACK_LOCAL + } + levels += ANY + levels.toArray + } } diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index 3b49af1258..3203be1029 100644 --- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -26,6 +26,7 @@ import akka.dispatch.Await import akka.pattern.ask import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent} import akka.util.Duration +import akka.util.duration._ import spark.{Utils, SparkException, Logging, TaskState} import spark.scheduler.cluster.StandaloneClusterMessages._ @@ -39,8 +40,6 @@ private[spark] class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: ActorSystem) extends SchedulerBackend with Logging { - // TODO(matei): periodically revive offers as in MesosScheduler - // Use an atomic variable to track total number of cores in the cluster for simplicity and speed var totalCoreCount = new AtomicInteger(0) @@ -55,6 +54,10 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor override def preStart() { // Listen for remote client disconnection events, since they don't go through Akka's watch() context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent]) + + // Periodically revive offers to allow delay scheduling to work + val reviveInterval = System.getProperty("spark.scheduler.revive.interval", "1000").toLong + context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers) } def receive = { From 222c8971285190761354456c2fe07f5c31edf330 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 14 Aug 2013 13:56:40 -0700 Subject: [PATCH 067/136] Comment cleanup (via Kay) and some debug messages --- .../scheduler/cluster/ClusterScheduler.scala | 30 +++++++------------ .../cluster/ClusterTaskSetManager.scala | 5 ++-- .../{ => cluster}/ClusterSchedulerSuite.scala | 2 +- .../{ => local}/LocalSchedulerSuite.scala | 2 +- 4 files changed, 16 insertions(+), 23 deletions(-) rename core/src/test/scala/spark/scheduler/{ => cluster}/ClusterSchedulerSuite.scala (99%) rename core/src/test/scala/spark/scheduler/{ => local}/LocalSchedulerSuite.scala (99%) diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala index ec76e90185..028f4d3283 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala @@ -385,25 +385,17 @@ private[spark] class ClusterScheduler(val sc: SparkContext) object ClusterScheduler { - - // Used to 'spray' available containers across the available set to ensure too many containers on same host - // are not used up. Used in yarn mode and in task scheduling (when there are multiple containers available - // to execute a task) - // For example: yarn can returns more containers than we would have requested under ANY, this method - // prioritizes how to use the allocated containers. - // flatten the map such that the array buffer entries are spread out across the returned value. - // given == , , , , , i - // the return value would be something like : h1c1, h2c1, h3c1, h4c1, h5c1, h1c2, h2c2, h3c2, h1c3, h2c3, h1c4, h1c5 - // We then 'use' the containers in this order (consuming only the top K from this list where - // K = number to be user). This is to ensure that if we have multiple eligible allocations, - // they dont end up allocating all containers on a small number of hosts - increasing probability of - // multiple container failure when a host goes down. - // Note, there is bias for keys with higher number of entries in value to be picked first (by design) - // Also note that invocation of this method is expected to have containers of same 'type' - // (host-local, rack-local, off-rack) and not across types : so that reordering is simply better from - // the available list - everything else being same. - // That is, we we first consume data local, then rack local and finally off rack nodes. So the - // prioritization from this method applies to within each category + /** + * Used to balance containers across hosts. + * + * Accepts a map of hosts to resource offers for that host, and returns a prioritized list of + * resource offers representing the order in which the offers should be used. The resource + * offers are ordered such that we'll allocate one container on each host before allocating a + * second container on any host, and so on, in order to reduce the damage if a host fails. + * + * For example, given , , , returns + * [o1, o5, o4, 02, o6, o3] + */ def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = { val _keyList = new ArrayBuffer[K](map.size) _keyList ++= map.keys diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala index cf406f876f..5316a7aed1 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -116,7 +116,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: // Figure out the current map output tracker epoch and set it on all tasks val epoch = sched.mapOutputTracker.getEpoch - logDebug("Epoch for " + taskSet.id + ": " + epoch) + logDebug("Epoch for " + taskSet + ": " + epoch) for (t <- tasks) { t.epoch = epoch } @@ -129,7 +129,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling val myLocalityLevels = computeValidLocalityLevels() - val localityWaits = myLocalityLevels.map(getLocalityWait) // spark.locality.wait + val localityWaits = myLocalityLevels.map(getLocalityWait) // Time to wait at each level // Delay scheduling variables: we keep track of our current locality level and the time we // last launched a task at that level, and move up a level when localityWaits[curLevel] expires. @@ -687,6 +687,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: levels += RACK_LOCAL } levels += ANY + logDebug("Valid locality levels for " + taskSet + ": " + levels.mkString(", ")) levels.toArray } } diff --git a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala similarity index 99% rename from core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala rename to core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala index 6327155157..8618009ea6 100644 --- a/core/src/test/scala/spark/scheduler/ClusterSchedulerSuite.scala +++ b/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package spark.scheduler.cluster import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter diff --git a/core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/local/LocalSchedulerSuite.scala similarity index 99% rename from core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala rename to core/src/test/scala/spark/scheduler/local/LocalSchedulerSuite.scala index a79b8bf256..d28ee47fa3 100644 --- a/core/src/test/scala/spark/scheduler/LocalSchedulerSuite.scala +++ b/core/src/test/scala/spark/scheduler/local/LocalSchedulerSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package spark.scheduler +package spark.scheduler.local import org.scalatest.FunSuite import org.scalatest.BeforeAndAfter From 2a4ed10210f9ee32f472e2465094d88561c0ff18 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 15 Aug 2013 17:22:49 -0700 Subject: [PATCH 068/136] Address some review comments: - When a resourceOffers() call has multiple offers, force the TaskSets to consider them in increasing order of locality levels so that they get a chance to launch stuff locally across all offers - Simplify ClusterScheduler.prioritizeContainers - Add docs on the new configuration options --- .../scheduler/cluster/ClusterScheduler.scala | 21 ++++++------- .../cluster/ClusterTaskSetManager.scala | 17 +++++++---- .../scheduler/cluster/TaskSetManager.scala | 6 +++- .../scheduler/local/LocalScheduler.scala | 3 +- .../scheduler/local/LocalTaskSetManager.scala | 6 +++- .../cluster/ClusterSchedulerSuite.scala | 8 +++-- docs/configuration.md | 30 +++++++++++++++++-- 7 files changed, 68 insertions(+), 23 deletions(-) diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala index 028f4d3283..e88edc5b2a 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala @@ -184,27 +184,29 @@ private[spark] class ClusterScheduler(val sc: SparkContext) } } - // Build a list of tasks to assign to each slave + // Build a list of tasks to assign to each worker val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores)) val availableCpus = offers.map(o => o.cores).toArray - val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue() - for (manager <- sortedTaskSetQueue) { + val sortedTaskSets = rootPool.getSortedTaskSetQueue() + for (taskSet <- sortedTaskSets) { logDebug("parentName: %s, name: %s, runningTasks: %s".format( - manager.parent.name, manager.name, manager.runningTasks)) + taskSet.parent.name, taskSet.name, taskSet.runningTasks)) } + // Take each TaskSet in our scheduling order, and then offer it each node in increasing order + // of locality levels so that it gets a chance to launch local tasks on all of them. var launchedTask = false - for (manager <- sortedTaskSetQueue; offer <- offers) { + for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) { do { launchedTask = false for (i <- 0 until offers.size) { val execId = offers(i).executorId val host = offers(i).host - for (task <- manager.resourceOffer(execId, host, availableCpus(i))) { + for (task <- taskSet.resourceOffer(execId, host, availableCpus(i), maxLocality)) { tasks(i) += task val tid = task.taskId - taskIdToTaskSetId(tid) = manager.taskSet.id - taskSetTaskIds(manager.taskSet.id) += tid + taskIdToTaskSetId(tid) = taskSet.taskSet.id + taskSetTaskIds(taskSet.taskSet.id) += tid taskIdToExecutorId(tid) = execId activeExecutorIds += execId executorsByHost(host) += execId @@ -402,8 +404,7 @@ object ClusterScheduler { // order keyList based on population of value in map val keyList = _keyList.sortWith( - // TODO(matei): not sure why we're using getOrElse if keyList = map.keys... see if it matters - (left, right) => map.get(left).getOrElse(Set()).size > map.get(right).getOrElse(Set()).size + (left, right) => map(left).size > map(right).size ) val retval = new ArrayBuffer[T](keyList.size * 2) diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala index 5316a7aed1..91de25254c 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -43,7 +43,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: extends TaskSetManager with Logging { // CPUs to request per task - val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toDouble + val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt // Maximum times a task is allowed to fail before failing the job val MAX_TASK_FAILURES = System.getProperty("spark.task.maxFailures", "4").toInt @@ -325,15 +325,22 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: /** * Respond to an offer of a single slave from the scheduler by finding a task */ - override def resourceOffer(execId: String, host: String, availableCpus: Double) + override def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) : Option[TaskDescription] = { if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) { val curTime = System.currentTimeMillis() - val locality = getAllowedLocalityLevel(curTime) + var allowedLocality = getAllowedLocalityLevel(curTime) + if (allowedLocality > maxLocality) { + allowedLocality = maxLocality // We're not allowed to search for farther-away tasks + } - findTask(execId, host, locality) match { + findTask(execId, host, allowedLocality) match { case Some((index, taskLocality)) => { // Found a task; do some bookkeeping and return a task description val task = tasks(index) @@ -347,7 +354,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: taskInfos(taskId) = info taskAttempts(index) = info :: taskAttempts(index) // Update our locality level for delay scheduling - currentLocalityIndex = getLocalityIndex(locality) + currentLocalityIndex = getLocalityIndex(allowedLocality) lastLaunchTime = curTime // Serialize and return the task val startTime = System.currentTimeMillis() diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala index 277654edc0..5ab6ab9aad 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala @@ -29,7 +29,11 @@ private[spark] trait TaskSetManager extends Schedulable { def taskSet: TaskSet - def resourceOffer(execId: String, hostPort: String, availableCpus: Double) + def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) : Option[TaskDescription] def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala index a4f5f46777..5be4dbd9f0 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala +++ b/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala @@ -141,8 +141,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: for (manager <- sortedTaskSetQueue) { do { launchTask = false - // TODO(matei): don't pass null here? - manager.resourceOffer(null, null, freeCpuCores) match { + manager.resourceOffer(null, null, freeCpuCores, null) match { case Some(task) => tasks += task taskIdToTaskSetId(task.taskId) = manager.taskSet.id diff --git a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala index 698c777bec..3ef636ff07 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala @@ -98,7 +98,11 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas return None } - override def resourceOffer(execId: String, host: String, availableCpus: Double) + override def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) : Option[TaskDescription] = { SparkEnv.set(sched.env) diff --git a/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala index 8618009ea6..aeeed14786 100644 --- a/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala +++ b/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala @@ -72,7 +72,11 @@ class DummyTaskSetManager( override def executorLost(executorId: String, host: String): Unit = { } - override def resourceOffer(execId: String, host: String, availableCpus: Double) + override def resourceOffer( + execId: String, + host: String, + availableCpus: Int, + maxLocality: TaskLocality.TaskLocality) : Option[TaskDescription] = { if (tasksFinished + runningTasks < numTasks) { @@ -120,7 +124,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging logInfo("parentName:%s, parent running tasks:%d, name:%s,runningTasks:%d".format(manager.parent.name, manager.parent.runningTasks, manager.name, manager.runningTasks)) } for (taskSet <- taskSetQueue) { - taskSet.resourceOffer("execId_1", "hostname_1", 1) match { + taskSet.resourceOffer("execId_1", "hostname_1", 1, TaskLocality.ANY) match { case Some(task) => return taskSet.stageId case None => {} diff --git a/docs/configuration.md b/docs/configuration.md index 99624a44aa..dff08a06f5 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -243,8 +243,34 @@ Apart from these, the following properties are also available, and may be useful 3000 Number of milliseconds to wait to launch a data-local task before giving up and launching it - in a non-data-local location. You should increase this if your tasks are long and you are seeing - poor data locality, but the default generally works well. + on a less-local node. The same wait will be used to step through multiple locality levels + (process-local, node-local, rack-local and then any). It is also possible to customize the + waiting time for each level by setting spark.locality.wait.node, etc. + You should increase this setting if your tasks are long and see poor locality, but the + default usually works well. + + + + spark.locality.wait.process + spark.locality.wait + + Customize the locality wait for process locality. This affects tasks that attempt to access + cached data in a particular executor process. + + + + spark.locality.wait.node + spark.locality.wait + + Customize the locality wait for node locality. For example, you can set this to 0 to skip + node locality and search immediately for rack locality (if your cluster has rack information). + + + + spark.locality.wait.rack + spark.locality.wait + + Customize the locality wait for rack locality. From 4004cf775d9397efbb5945768aaf05ba682c715c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Fri, 16 Aug 2013 14:01:12 -0700 Subject: [PATCH 069/136] Added some comments on threading in scheduler code --- .../scala/spark/scheduler/DAGScheduler.scala | 20 +++++++++++++++---- .../scheduler/cluster/ClusterScheduler.scala | 12 ++++++++++- .../cluster/ClusterTaskSetManager.scala | 9 ++++++++- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala index 2f7e6d98f8..35b31f45a7 100644 --- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala @@ -32,10 +32,22 @@ import spark.storage.{BlockManager, BlockManagerMaster} import spark.util.{MetadataCleaner, TimeStampedHashMap} /** - * A Scheduler subclass that implements stage-oriented scheduling. It computes a DAG of stages for - * each job, keeps track of which RDDs and stage outputs are materialized, and computes a minimal - * schedule to run the job. Subclasses only need to implement the code to send a task to the cluster - * and to report fetch failures (the submitTasks method, and code to add CompletionEvents). + * The high-level scheduling layer that implements stage-oriented scheduling. It computes a DAG of + * stages for each job, keeps track of which RDDs and stage outputs are materialized, and finds a + * minimal schedule to run the job. It then submits stages as TaskSets to an underlying + * TaskScheduler implementation that runs them on the cluster. + * + * In addition to coming up with a DAG of stages, this class also determines the preferred + * locations to run each task on, based on the current cache status, and passes these to the + * low-level TaskScheduler. Furthermore, it handles failures due to shuffle output files being + * lost, in which case old stages may need to be resubmitted. Failures *within* a stage that are + * not caused by shuffie file loss are handled by the TaskScheduler, which will retry each task + * a small number of times before cancelling the whole stage. + * + * THREADING: This class runs all its logic in a single thread executing the run() method, to which + * events are submitted using a synchonized queue (eventQueue). The public API methods, such as + * runJob, taskEnded and executorLost, post events asynchronously to this queue. All other methods + * should be private. */ private[spark] class DAGScheduler( diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala index e88edc5b2a..679d899b47 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala @@ -33,7 +33,17 @@ import java.util.{TimerTask, Timer} /** * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call - * start(), then submit task sets through the runTasks method. + * initialize() and start(), then submit task sets through the runTasks method. + * + * This class can work with multiple types of clusters by acting through a SchedulerBackend. + * It handles common logic, like determining a scheduling order across jobs, waking up to launch + * speculative tasks, etc. + * + * THREADING: SchedulerBackends and task-submitting clients can call this class from multiple + * threads, so it needs locks in public API methods to maintain its state. In addition, some + * SchedulerBackends sycnchronize on themselves when they want to send events here, and then + * acquire a lock on us, so we need to make sure that we don't try to lock the backend while + * we are holding a lock on ourselves. */ private[spark] class ClusterScheduler(val sc: SparkContext) extends TaskScheduler diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala index 91de25254c..1d57732f5d 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -37,7 +37,14 @@ import spark.TaskResultTooBigFailure /** - * Schedules the tasks within a single TaskSet in the ClusterScheduler. + * Schedules the tasks within a single TaskSet in the ClusterScheduler. This class keeps track of + * the status of each task, retries tasks if they fail (up to a limited number of times), and + * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces + * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, + * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished). + * + * THREADING: This class is designed to only be called from code with a lock on the + * ClusterScheduler (e.g. its event handlers). It should not be called from other threads. */ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: TaskSet) extends TaskSetManager with Logging { From 8ac3d1e2636ec71ab9a14bed68f138e3a365603e Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sun, 18 Aug 2013 19:36:34 -0700 Subject: [PATCH 070/136] Added unit tests for ClusterTaskSetManager, and fix a bug found with resetting locality level after a non-local launch --- .../scala/spark/scheduler/TaskLocation.scala | 4 +- .../scala/spark/scheduler/TaskResult.scala | 4 +- .../cluster/ClusterTaskSetManager.scala | 27 +- .../scheduler/cluster/TaskDescription.scala | 3 + .../scheduler/cluster/TaskSetManager.scala | 9 + .../scheduler/local/LocalTaskSetManager.scala | 2 +- core/src/main/scala/spark/util/Clock.scala | 29 ++ .../cluster/ClusterSchedulerSuite.scala | 21 +- .../cluster/ClusterTaskSetManagerSuite.scala | 273 ++++++++++++++++++ .../spark/scheduler/cluster/FakeTask.scala | 26 ++ .../src/test/scala/spark/util/FakeClock.scala | 26 ++ 11 files changed, 396 insertions(+), 28 deletions(-) create mode 100644 core/src/main/scala/spark/util/Clock.scala create mode 100644 core/src/test/scala/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala create mode 100644 core/src/test/scala/spark/scheduler/cluster/FakeTask.scala create mode 100644 core/src/test/scala/spark/util/FakeClock.scala diff --git a/core/src/main/scala/spark/scheduler/TaskLocation.scala b/core/src/main/scala/spark/scheduler/TaskLocation.scala index 0e97c61188..fea117e956 100644 --- a/core/src/main/scala/spark/scheduler/TaskLocation.scala +++ b/core/src/main/scala/spark/scheduler/TaskLocation.scala @@ -23,7 +23,9 @@ package spark.scheduler * of preference will be executors on the same host if this is not possible. */ private[spark] -class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable +class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable { + override def toString: String = "TaskLocation(" + host + ", " + executorId + ")" +} private[spark] object TaskLocation { def apply(host: String, executorId: String) = new TaskLocation(host, Some(executorId)) diff --git a/core/src/main/scala/spark/scheduler/TaskResult.scala b/core/src/main/scala/spark/scheduler/TaskResult.scala index 89793e0e82..fc4856756b 100644 --- a/core/src/main/scala/spark/scheduler/TaskResult.scala +++ b/core/src/main/scala/spark/scheduler/TaskResult.scala @@ -28,7 +28,9 @@ import java.nio.ByteBuffer // TODO: Use of distributed cache to return result is a hack to get around // what seems to be a bug with messages over 60KB in libprocess; fix it private[spark] -class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) extends Externalizable { +class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) + extends Externalizable +{ def this() = this(null.asInstanceOf[T], null, null) override def writeExternal(out: ObjectOutput) { diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala index 1d57732f5d..a4d6880abb 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -34,6 +34,7 @@ import scala.Some import spark.FetchFailed import spark.ExceptionFailure import spark.TaskResultTooBigFailure +import spark.util.{SystemClock, Clock} /** @@ -46,9 +47,13 @@ import spark.TaskResultTooBigFailure * THREADING: This class is designed to only be called from code with a lock on the * ClusterScheduler (e.g. its event handlers). It should not be called from other threads. */ -private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: TaskSet) - extends TaskSetManager with Logging { - +private[spark] class ClusterTaskSetManager( + sched: ClusterScheduler, + val taskSet: TaskSet, + clock: Clock = SystemClock) + extends TaskSetManager + with Logging +{ // CPUs to request per task val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt @@ -142,7 +147,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: // last launched a task at that level, and move up a level when localityWaits[curLevel] expires. // We then move down if we manage to launch a "more local" task. var currentLocalityIndex = 0 // Index of our current locality level in validLocalityLevels - var lastLaunchTime = System.currentTimeMillis() // Time we last launched a task at this level + var lastLaunchTime = clock.getTime() // Time we last launched a task at this level /** * Add a task to all the pending-task lists that it should be on. If readding is set, we are @@ -340,7 +345,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: : Option[TaskDescription] = { if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) { - val curTime = System.currentTimeMillis() + val curTime = clock.getTime() var allowedLocality = getAllowedLocalityLevel(curTime) if (allowedLocality > maxLocality) { @@ -361,22 +366,22 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: taskInfos(taskId) = info taskAttempts(index) = info :: taskAttempts(index) // Update our locality level for delay scheduling - currentLocalityIndex = getLocalityIndex(allowedLocality) + currentLocalityIndex = getLocalityIndex(taskLocality) lastLaunchTime = curTime // Serialize and return the task - val startTime = System.currentTimeMillis() + val startTime = clock.getTime() // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here // we assume the task can be serialized without exceptions. val serializedTask = Task.serializeWithDependencies( task, sched.sc.addedFiles, sched.sc.addedJars, ser) - val timeTaken = System.currentTimeMillis() - startTime + val timeTaken = clock.getTime() - startTime increaseRunningTasks(1) logInfo("Serialized task %s:%d as %d bytes in %d ms".format( taskSet.id, index, serializedTask.limit, timeTaken)) val taskName = "task %s:%d".format(taskSet.id, index) if (taskAttempts(index).size == 1) taskStarted(task,info) - return Some(new TaskDescription(taskId, execId, taskName, serializedTask)) + return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask)) } case _ => } @@ -505,7 +510,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: case ef: ExceptionFailure => sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null)) val key = ef.description - val now = System.currentTimeMillis() + val now = clock.getTime() val (printFull, dupCount) = { if (recentExceptions.contains(key)) { val (dupCount, printTime) = recentExceptions(key) @@ -643,7 +648,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet: val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation) if (tasksFinished >= minFinishedForSpeculation) { - val time = System.currentTimeMillis() + val time = clock.getTime() val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray Arrays.sort(durations) val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1)) diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala b/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala index 761fdf6919..187553233f 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala +++ b/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala @@ -24,6 +24,7 @@ private[spark] class TaskDescription( val taskId: Long, val executorId: String, val name: String, + val index: Int, // Index within this task's TaskSet _serializedTask: ByteBuffer) extends Serializable { @@ -31,4 +32,6 @@ private[spark] class TaskDescription( private val buffer = new SerializableBuffer(_serializedTask) def serializedTask: ByteBuffer = buffer.value + + override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index) } diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala index 5ab6ab9aad..0248830b7a 100644 --- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala @@ -22,6 +22,15 @@ import java.nio.ByteBuffer import spark.TaskState.TaskState import spark.scheduler.TaskSet +/** + * Tracks and schedules the tasks within a single TaskSet. This class keeps track of the status of + * each task and is responsible for retries on failure and locality. The main interfaces to it + * are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, and + * statusUpdate, which tells it that one of its tasks changed state (e.g. finished). + * + * THREADING: This class is designed to only be called from code with a lock on the TaskScheduler + * (e.g. its event handlers). It should not be called from other threads. + */ private[spark] trait TaskSetManager extends Schedulable { def schedulableQueue = null diff --git a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala index 3ef636ff07..e237f289e3 100644 --- a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala +++ b/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala @@ -125,7 +125,7 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas copiesRunning(index) += 1 increaseRunningTasks(1) taskStarted(task, info) - return Some(new TaskDescription(taskId, null, taskName, bytes)) + return Some(new TaskDescription(taskId, null, taskName, index, bytes)) case None => {} } } diff --git a/core/src/main/scala/spark/util/Clock.scala b/core/src/main/scala/spark/util/Clock.scala new file mode 100644 index 0000000000..aa71a5b442 --- /dev/null +++ b/core/src/main/scala/spark/util/Clock.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.util + +/** + * An interface to represent clocks, so that they can be mocked out in unit tests. + */ +private[spark] trait Clock { + def getTime(): Long +} + +private[spark] object SystemClock extends Clock { + def getTime(): Long = System.currentTimeMillis() +} diff --git a/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala b/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala index aeeed14786..abfdabf5fe 100644 --- a/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala +++ b/core/src/test/scala/spark/scheduler/cluster/ClusterSchedulerSuite.scala @@ -27,7 +27,7 @@ import scala.collection.mutable.ArrayBuffer import java.util.Properties -class DummyTaskSetManager( +class FakeTaskSetManager( initPriority: Int, initStageId: Int, initNumTasks: Int, @@ -81,7 +81,7 @@ class DummyTaskSetManager( { if (tasksFinished + runningTasks < numTasks) { increaseRunningTasks(1) - return Some(new TaskDescription(0, execId, "task 0:0", null)) + return Some(new TaskDescription(0, execId, "task 0:0", 0, null)) } return None } @@ -104,17 +104,10 @@ class DummyTaskSetManager( } } -class DummyTask(stageId: Int) extends Task[Int](stageId) -{ - def run(attemptId: Long): Int = { - return 0 - } -} - class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging { - def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): DummyTaskSetManager = { - new DummyTaskSetManager(priority, stage, numTasks, cs , taskSet) + def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): FakeTaskSetManager = { + new FakeTaskSetManager(priority, stage, numTasks, cs , taskSet) } def resourceOffer(rootPool: Pool): Int = { @@ -141,7 +134,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging sc = new SparkContext("local", "ClusterSchedulerSuite") val clusterScheduler = new ClusterScheduler(sc) var tasks = ArrayBuffer[Task[_]]() - val task = new DummyTask(0) + val task = new FakeTask(0) tasks += task val taskSet = new TaskSet(tasks.toArray,0,0,0,null) @@ -168,7 +161,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging sc = new SparkContext("local", "ClusterSchedulerSuite") val clusterScheduler = new ClusterScheduler(sc) var tasks = ArrayBuffer[Task[_]]() - val task = new DummyTask(0) + val task = new FakeTask(0) tasks += task val taskSet = new TaskSet(tasks.toArray,0,0,0,null) @@ -225,7 +218,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging sc = new SparkContext("local", "ClusterSchedulerSuite") val clusterScheduler = new ClusterScheduler(sc) var tasks = ArrayBuffer[Task[_]]() - val task = new DummyTask(0) + val task = new FakeTask(0) tasks += task val taskSet = new TaskSet(tasks.toArray,0,0,0,null) diff --git a/core/src/test/scala/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala b/core/src/test/scala/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala new file mode 100644 index 0000000000..5a0b949ef5 --- /dev/null +++ b/core/src/test/scala/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.scheduler.cluster + +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable + +import org.scalatest.FunSuite + +import spark._ +import spark.scheduler._ +import spark.executor.TaskMetrics +import java.nio.ByteBuffer +import spark.util.FakeClock + +/** + * A mock ClusterScheduler implementation that just remembers information about tasks started and + * feedback received from the TaskSetManagers. Note that it's important to initialize this with + * a list of "live" executors and their hostnames for isExecutorAlive and hasExecutorsAliveOnHost + * to work, and these are required for locality in ClusterTaskSetManager. + */ +class FakeClusterScheduler(sc: SparkContext, liveExecutors: (String, String)* /* execId, host */) + extends ClusterScheduler(sc) +{ + val startedTasks = new ArrayBuffer[Long] + val endedTasks = new mutable.HashMap[Long, TaskEndReason] + val finishedManagers = new ArrayBuffer[TaskSetManager] + + val executors = new mutable.HashMap[String, String] ++ liveExecutors + + listener = new TaskSchedulerListener { + def taskStarted(task: Task[_], taskInfo: TaskInfo) { + startedTasks += taskInfo.index + } + + def taskEnded( + task: Task[_], + reason: TaskEndReason, + result: Any, + accumUpdates: mutable.Map[Long, Any], + taskInfo: TaskInfo, + taskMetrics: TaskMetrics) + { + endedTasks(taskInfo.index) = reason + } + + def executorGained(execId: String, host: String) {} + + def executorLost(execId: String) {} + + def taskSetFailed(taskSet: TaskSet, reason: String) {} + } + + def removeExecutor(execId: String): Unit = executors -= execId + + override def taskSetFinished(manager: TaskSetManager): Unit = finishedManagers += manager + + override def isExecutorAlive(execId: String): Boolean = executors.contains(execId) + + override def hasExecutorsAliveOnHost(host: String): Boolean = executors.values.exists(_ == host) +} + +class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging { + import TaskLocality.{ANY, PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL} + + val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong + + test("TaskSet with no preferences") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1")) + val taskSet = createTaskSet(1) + val manager = new ClusterTaskSetManager(sched, taskSet) + + // Offer a host with no CPUs + assert(manager.resourceOffer("exec1", "host1", 0, ANY) === None) + + // Offer a host with process-local as the constraint; this should work because the TaskSet + // above won't have any locality preferences + val taskOption = manager.resourceOffer("exec1", "host1", 2, TaskLocality.PROCESS_LOCAL) + assert(taskOption.isDefined) + val task = taskOption.get + assert(task.executorId === "exec1") + assert(sched.startedTasks.contains(0)) + + // Re-offer the host -- now we should get no more tasks + assert(manager.resourceOffer("exec1", "host1", 2, PROCESS_LOCAL) === None) + + // Tell it the task has finished + manager.statusUpdate(0, TaskState.FINISHED, createTaskResult(0)) + assert(sched.endedTasks(0) === Success) + assert(sched.finishedManagers.contains(manager)) + } + + test("multiple offers with no preferences") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1")) + val taskSet = createTaskSet(3) + val manager = new ClusterTaskSetManager(sched, taskSet) + + // First three offers should all find tasks + for (i <- 0 until 3) { + val taskOption = manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) + assert(taskOption.isDefined) + val task = taskOption.get + assert(task.executorId === "exec1") + } + assert(sched.startedTasks.toSet === Set(0, 1, 2)) + + // Re-offer the host -- now we should get no more tasks + assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None) + + // Finish the first two tasks + manager.statusUpdate(0, TaskState.FINISHED, createTaskResult(0)) + manager.statusUpdate(1, TaskState.FINISHED, createTaskResult(1)) + assert(sched.endedTasks(0) === Success) + assert(sched.endedTasks(1) === Success) + assert(!sched.finishedManagers.contains(manager)) + + // Finish the last task + manager.statusUpdate(2, TaskState.FINISHED, createTaskResult(2)) + assert(sched.endedTasks(2) === Success) + assert(sched.finishedManagers.contains(manager)) + } + + test("basic delay scheduling") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2")) + val taskSet = createTaskSet(4, + Seq(TaskLocation("host1", "exec1")), + Seq(TaskLocation("host2", "exec2")), + Seq(TaskLocation("host1"), TaskLocation("host2", "exec2")), + Seq() // Last task has no locality prefs + ) + val clock = new FakeClock + val manager = new ClusterTaskSetManager(sched, taskSet, clock) + + // First offer host1, exec1: first task should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0) + + // Offer host1, exec1 again: the last task, which has no prefs, should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 3) + + // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None) + + clock.advance(LOCALITY_WAIT) + + // Offer host1, exec1 again, at PROCESS_LOCAL level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, PROCESS_LOCAL) === None) + + // Offer host1, exec1 again, at NODE_LOCAL level: we should choose task 2 + assert(manager.resourceOffer("exec1", "host1", 1, NODE_LOCAL).get.index == 2) + + // Offer host1, exec1 again, at NODE_LOCAL level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, NODE_LOCAL) === None) + + // Offer host1, exec1 again, at ANY level: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + + clock.advance(LOCALITY_WAIT) + + // Offer host1, exec1 again, at ANY level: task 1 should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1) + + // Offer host1, exec1 again, at ANY level: nothing should be chosen as we've launched all tasks + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + } + + test("delay scheduling with fallback") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, + ("exec1", "host1"), ("exec2", "host2"), ("exec3", "host3")) + val taskSet = createTaskSet(5, + Seq(TaskLocation("host1")), + Seq(TaskLocation("host2")), + Seq(TaskLocation("host2")), + Seq(TaskLocation("host3")), + Seq(TaskLocation("host2")) + ) + val clock = new FakeClock + val manager = new ClusterTaskSetManager(sched, taskSet, clock) + + // First offer host1: first task should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0) + + // Offer host1 again: nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + + clock.advance(LOCALITY_WAIT) + + // Offer host1 again: second task (on host2) should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1) + + // Offer host1 again: third task (on host2) should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 2) + + // Offer host2: fifth task (also on host2) should get chosen + assert(manager.resourceOffer("exec2", "host2", 1, ANY).get.index === 4) + + // Now that we've launched a local task, we should no longer launch the task for host3 + assert(manager.resourceOffer("exec2", "host2", 1, ANY) === None) + + clock.advance(LOCALITY_WAIT) + + // After another delay, we can go ahead and launch that task non-locally + assert(manager.resourceOffer("exec2", "host2", 1, ANY).get.index === 3) + } + + test("delay scheduling with failed hosts") { + sc = new SparkContext("local", "test") + val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2")) + val taskSet = createTaskSet(3, + Seq(TaskLocation("host1")), + Seq(TaskLocation("host2")), + Seq(TaskLocation("host3")) + ) + val clock = new FakeClock + val manager = new ClusterTaskSetManager(sched, taskSet, clock) + + // First offer host1: first task should be chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0) + + // Offer host1 again: third task should be chosen immediately because host3 is not up + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 2) + + // After this, nothing should get chosen + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + + // Now mark host2 as dead + sched.removeExecutor("exec2") + manager.executorLost("exec2", "host2") + + // Task 1 should immediately be launched on host1 because its original host is gone + assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 1) + + // Now that all tasks have launched, nothing new should be launched anywhere else + assert(manager.resourceOffer("exec1", "host1", 1, ANY) === None) + assert(manager.resourceOffer("exec2", "host2", 1, ANY) === None) + } + + /** + * Utility method to create a TaskSet, potentially setting a particular sequence of preferred + * locations for each task (given as varargs) if this sequence is not empty. + */ + def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { + if (prefLocs.size != 0 && prefLocs.size != numTasks) { + throw new IllegalArgumentException("Wrong number of task locations") + } + val tasks = Array.tabulate[Task[_]](numTasks) { i => + new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil) + } + new TaskSet(tasks, 0, 0, 0, null) + } + + def createTaskResult(id: Int): ByteBuffer = { + ByteBuffer.wrap(Utils.serialize(new TaskResult[Int](id, mutable.Map.empty, new TaskMetrics))) + } +} diff --git a/core/src/test/scala/spark/scheduler/cluster/FakeTask.scala b/core/src/test/scala/spark/scheduler/cluster/FakeTask.scala new file mode 100644 index 0000000000..de9e66be20 --- /dev/null +++ b/core/src/test/scala/spark/scheduler/cluster/FakeTask.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.scheduler.cluster + +import spark.scheduler.{TaskLocation, Task} + +class FakeTask(stageId: Int, prefLocs: Seq[TaskLocation] = Nil) extends Task[Int](stageId) { + override def run(attemptId: Long): Int = 0 + + override def preferredLocations: Seq[TaskLocation] = prefLocs +} diff --git a/core/src/test/scala/spark/util/FakeClock.scala b/core/src/test/scala/spark/util/FakeClock.scala new file mode 100644 index 0000000000..236706317e --- /dev/null +++ b/core/src/test/scala/spark/util/FakeClock.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.util + +class FakeClock extends Clock { + private var time = 0L + + def advance(millis: Long): Unit = time += millis + + def getTime(): Long = time +} From 82bf4c0339808f51c9cdffa6a0a829cb5981d92d Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sun, 18 Aug 2013 20:25:45 -0700 Subject: [PATCH 071/136] Allow subclasses of Product2 in all key-value related classes (ShuffleDependency, PairRDDFunctions, etc). --- core/src/main/scala/spark/Aggregator.scala | 8 +- core/src/main/scala/spark/Dependency.scala | 4 +- .../main/scala/spark/PairRDDFunctions.scala | 104 +++++------------- core/src/main/scala/spark/Partitioner.scala | 2 +- core/src/main/scala/spark/RDD.scala | 5 +- core/src/main/scala/spark/SparkContext.scala | 9 +- .../scala/spark/api/java/JavaPairRDD.scala | 13 ++- .../main/scala/spark/rdd/CoGroupedRDD.scala | 6 +- .../main/scala/spark/rdd/ShuffledRDD.scala | 6 +- .../main/scala/spark/rdd/SubtractedRDD.scala | 6 +- .../tools/JavaAPICompletenessChecker.scala | 6 +- 11 files changed, 60 insertions(+), 109 deletions(-) diff --git a/core/src/main/scala/spark/Aggregator.scala b/core/src/main/scala/spark/Aggregator.scala index 136b4da61e..3920f8511c 100644 --- a/core/src/main/scala/spark/Aggregator.scala +++ b/core/src/main/scala/spark/Aggregator.scala @@ -28,11 +28,11 @@ import scala.collection.JavaConversions._ * @param mergeCombiners function to merge outputs from multiple mergeValue function. */ case class Aggregator[K, V, C] ( - val createCombiner: V => C, - val mergeValue: (C, V) => C, - val mergeCombiners: (C, C) => C) { + createCombiner: V => C, + mergeValue: (C, V) => C, + mergeCombiners: (C, C) => C) { - def combineValuesByKey(iter: Iterator[(K, V)]) : Iterator[(K, C)] = { + def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]) : Iterator[(K, C)] = { val combiners = new JHashMap[K, C] for ((k, v) <- iter) { val oldC = combiners.get(k) diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala index b1edaa06f8..d5a9606570 100644 --- a/core/src/main/scala/spark/Dependency.scala +++ b/core/src/main/scala/spark/Dependency.scala @@ -44,10 +44,10 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) { * @param serializerClass class name of the serializer to use */ class ShuffleDependency[K, V]( - @transient rdd: RDD[(K, V)], + @transient rdd: RDD[_ <: Product2[K, V]], val partitioner: Partitioner, val serializerClass: String = null) - extends Dependency(rdd) { + extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) { val shuffleId: Int = rdd.context.newShuffleId() } diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index 0be4b4feb8..3ae703ce1a 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -21,9 +21,8 @@ import java.nio.ByteBuffer import java.util.{Date, HashMap => JHashMap} import java.text.SimpleDateFormat -import scala.collection.Map +import scala.collection.{mutable, Map} import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.HashMap import scala.collection.JavaConversions._ import org.apache.hadoop.conf.Configuration @@ -38,7 +37,6 @@ import org.apache.hadoop.mapred.OutputFormat import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, Job => NewAPIHadoopJob, HadoopMapReduceUtil} -import org.apache.hadoop.security.UserGroupInformation import spark.partial.BoundedDouble import spark.partial.PartialResult @@ -50,8 +48,7 @@ import spark.Partitioner._ * Extra functions available on RDDs of (key, value) pairs through an implicit conversion. * Import `spark.SparkContext._` at the top of your program to use these functions. */ -class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( - self: RDD[(K, V)]) +class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Product2[K, V]]) extends Logging with HadoopMapReduceUtil with Serializable { @@ -85,18 +82,17 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( } val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners) if (self.partitioner == Some(partitioner)) { - self.mapPartitions(aggregator.combineValuesByKey, true) + self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) } else if (mapSideCombine) { - val mapSideCombined = self.mapPartitions(aggregator.combineValuesByKey, true) - val partitioned = new ShuffledRDD[K, C](mapSideCombined, partitioner) - .setSerializer(serializerClass) - partitioned.mapPartitions(aggregator.combineCombinersByKey, true) + val combined = self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) + val partitioned = new ShuffledRDD[K, C](combined, partitioner).setSerializer(serializerClass) + partitioned.mapPartitions(aggregator.combineCombinersByKey, preservesPartitioning = true) } else { // Don't apply map-side combiner. // A sanity check to make sure mergeCombiners is not defined. assert(mergeCombiners == null) val values = new ShuffledRDD[K, V](self, partitioner).setSerializer(serializerClass) - values.mapPartitions(aggregator.combineValuesByKey, true) + values.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) } } @@ -166,7 +162,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( throw new SparkException("reduceByKeyLocally() does not support array keys") } - def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = { + def reducePartition(iter: Iterator[Product2[K, V]]): Iterator[JHashMap[K, V]] = { val map = new JHashMap[K, V] for ((k, v) <- iter) { val old = map.get(k) @@ -180,7 +176,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( val old = m1.get(k) m1.put(k, if (old == null) v else func(old, v)) } - return m1 + m1 } self.mapPartitions(reducePartition).reduce(mergeMaps) @@ -378,7 +374,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( /** * Return the key-value pairs in this RDD to the master as a Map. */ - def collectAsMap(): Map[K, V] = HashMap(self.collect(): _*) + def collectAsMap(): Map[K, V] = { + val data = self.toArray() + val map = new mutable.HashMap[K, V] + map.sizeHint(data.length) + data.foreach { case(k, v) => map.put(k, v) } + map + } /** * Pass each value in the key-value pair RDD through a map function without changing the keys; @@ -406,13 +408,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } - val cg = new CoGroupedRDD[K]( - Seq(self.asInstanceOf[RDD[(K, _)]], other.asInstanceOf[RDD[(K, _)]]), - partitioner) + val cg = new CoGroupedRDD[K](Seq(self, other), partitioner) val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest) - prfs.mapValues { - case Seq(vs, ws) => - (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]]) + prfs.mapValues { case Seq(vs, ws) => + (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]]) } } @@ -425,15 +424,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } - val cg = new CoGroupedRDD[K]( - Seq(self.asInstanceOf[RDD[(K, _)]], - other1.asInstanceOf[RDD[(K, _)]], - other2.asInstanceOf[RDD[(K, _)]]), - partitioner) + val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner) val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest) - prfs.mapValues { - case Seq(vs, w1s, w2s) => - (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]]) + prfs.mapValues { case Seq(vs, w1s, w2s) => + (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]]) } } @@ -507,7 +501,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( self.partitioner match { case Some(p) => val index = p.getPartition(key) - def process(it: Iterator[(K, V)]): Seq[V] = { + def process(it: Iterator[Product2[K, V]]): Seq[V] = { val buf = new ArrayBuffer[V] for ((k, v) <- it if k == key) { buf += v @@ -565,7 +559,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( val formatter = new SimpleDateFormat("yyyyMMddHHmm") val jobtrackerID = formatter.format(new Date()) val stageId = self.id - def writeShard(context: spark.TaskContext, iter: Iterator[(K,V)]): Int = { + def writeShard(context: spark.TaskContext, iter: Iterator[Product2[K,V]]): Int = { // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt @@ -664,7 +658,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( val writer = new HadoopWriter(conf) writer.preSetup() - def writeToFile(context: TaskContext, iter: Iterator[(K,V)]) { + def writeToFile(context: TaskContext, iter: Iterator[Product2[K,V]]) { // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt @@ -703,54 +697,6 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( private[spark] def getValueClass() = implicitly[ClassManifest[V]].erasure } -/** - * Extra functions available on RDDs of (key, value) pairs where the key is sortable through - * an implicit conversion. Import `spark.SparkContext._` at the top of your program to use these - * functions. They will work with any key type that has a `scala.math.Ordered` implementation. - */ -class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( - self: RDD[(K, V)]) - extends Logging - with Serializable { - - /** - * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling - * `collect` or `save` on the resulting RDD will return or output an ordered list of records - * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in - * order of the keys). - */ - def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[(K,V)] = { - val shuffled = - new ShuffledRDD[K, V](self, new RangePartitioner(numPartitions, self, ascending)) - shuffled.mapPartitions(iter => { - val buf = iter.toArray - if (ascending) { - buf.sortWith((x, y) => x._1 < y._1).iterator - } else { - buf.sortWith((x, y) => x._1 > y._1).iterator - } - }, true) - } -} - -private[spark] -class MappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => U) extends RDD[(K, U)](prev) { - override def getPartitions = firstParent[(K, V)].partitions - override val partitioner = firstParent[(K, V)].partitioner - override def compute(split: Partition, context: TaskContext) = - firstParent[(K, V)].iterator(split, context).map{ case (k, v) => (k, f(v)) } -} - -private[spark] -class FlatMappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => TraversableOnce[U]) - extends RDD[(K, U)](prev) { - - override def getPartitions = firstParent[(K, V)].partitions - override val partitioner = firstParent[(K, V)].partitioner - override def compute(split: Partition, context: TaskContext) = { - firstParent[(K, V)].iterator(split, context).flatMap { case (k, v) => f(v).map(x => (k, x)) } - } -} private[spark] object Manifests { val seqSeqManifest = classManifest[Seq[Seq[_]]] diff --git a/core/src/main/scala/spark/Partitioner.scala b/core/src/main/scala/spark/Partitioner.scala index 6035bc075e..65da8235d7 100644 --- a/core/src/main/scala/spark/Partitioner.scala +++ b/core/src/main/scala/spark/Partitioner.scala @@ -84,7 +84,7 @@ class HashPartitioner(partitions: Int) extends Partitioner { */ class RangePartitioner[K <% Ordered[K]: ClassManifest, V]( partitions: Int, - @transient rdd: RDD[(K,V)], + @transient rdd: RDD[_ <: Product2[K,V]], private val ascending: Boolean = true) extends Partitioner { diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index 503ea6ccbf..04b37df212 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -287,7 +287,10 @@ abstract class RDD[T: ClassManifest]( def coalesce(numPartitions: Int, shuffle: Boolean = false): RDD[T] = { if (shuffle) { // include a shuffle step so that our upstream tasks are still distributed - new CoalescedRDD(new ShuffledRDD(map(x => (x, null)), new HashPartitioner(numPartitions)), numPartitions).keys + new CoalescedRDD( + new ShuffledRDD(map(x => (x, null)), + new HashPartitioner(numPartitions)), + numPartitions).keys } else { new CoalescedRDD(this, numPartitions) } diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index 80c65dfebd..c049bd3fa9 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -60,7 +60,8 @@ import org.apache.mesos.MesosNativeLibrary import spark.deploy.{LocalSparkCluster, SparkHadoopUtil} import spark.partial.{ApproximateEvaluator, PartialResult} -import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD} +import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD, + OrderedRDDFunctions} import spark.scheduler.{DAGScheduler, DAGSchedulerSource, ResultTask, ShuffleMapTask, SparkListener, SplitInfo, Stage, StageInfo, TaskScheduler, ActiveJob} import spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, @@ -833,11 +834,11 @@ class SparkContext( /** Default min number of partitions for Hadoop RDDs when not given by user */ def defaultMinSplits: Int = math.min(defaultParallelism, 2) - private var nextShuffleId = new AtomicInteger(0) + private val nextShuffleId = new AtomicInteger(0) private[spark] def newShuffleId(): Int = nextShuffleId.getAndIncrement() - private var nextRddId = new AtomicInteger(0) + private val nextRddId = new AtomicInteger(0) /** Register a new RDD, returning its RDD ID */ private[spark] def newRddId(): Int = nextRddId.getAndIncrement() @@ -886,7 +887,7 @@ object SparkContext { implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( rdd: RDD[(K, V)]) = - new OrderedRDDFunctions(rdd) + new OrderedRDDFunctions(rdd.asInstanceOf[RDD[Product2[K, V]]]) implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd) diff --git a/core/src/main/scala/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/spark/api/java/JavaPairRDD.scala index c2995b836a..f5632428e7 100644 --- a/core/src/main/scala/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/spark/api/java/JavaPairRDD.scala @@ -30,17 +30,18 @@ import org.apache.hadoop.mapred.OutputFormat import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.hadoop.conf.Configuration -import spark.api.java.function.{Function2 => JFunction2} -import spark.api.java.function.{Function => JFunction} -import spark.partial.BoundedDouble -import spark.partial.PartialResult -import spark.OrderedRDDFunctions -import spark.storage.StorageLevel import spark.HashPartitioner import spark.Partitioner import spark.Partitioner._ import spark.RDD import spark.SparkContext.rddToPairRDDFunctions +import spark.api.java.function.{Function2 => JFunction2} +import spark.api.java.function.{Function => JFunction} +import spark.partial.BoundedDouble +import spark.partial.PartialResult +import spark.rdd.OrderedRDDFunctions +import spark.storage.StorageLevel + class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManifest[K], implicit val vManifest: ClassManifest[V]) extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] { diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala index c2d95dc060..06e15bb73c 100644 --- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala @@ -60,7 +60,7 @@ class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]) * @param rdds parent RDDs. * @param part partitioner used to partition the shuffle output. */ -class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(K, _)]], part: Partitioner) +class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner) extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) { private var serializerClass: String = null @@ -71,13 +71,13 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(K, _)]], part: Partitioner) } override def getDependencies: Seq[Dependency[_]] = { - rdds.map { rdd: RDD[(K, _)] => + rdds.map { rdd: RDD[_ <: Product2[K, _]] => if (rdd.partitioner == Some(part)) { logInfo("Adding one-to-one dependency with " + rdd) new OneToOneDependency(rdd) } else { logInfo("Adding shuffle dependency with " + rdd) - new ShuffleDependency[Any, Any](rdd.asInstanceOf[RDD[(Any, Any)]], part, serializerClass) + new ShuffleDependency[Any, Any](rdd, part, serializerClass) } } } diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/spark/rdd/ShuffledRDD.scala index bcf7d0d89c..2eac62f9c0 100644 --- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala +++ b/core/src/main/scala/spark/rdd/ShuffledRDD.scala @@ -17,9 +17,7 @@ package spark.rdd -import spark._ -import scala.Some -import scala.Some +import spark.{Dependency, Partitioner, RDD, SparkEnv, ShuffleDependency, Partition, TaskContext} private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { @@ -35,7 +33,7 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { * @tparam V the value class. */ class ShuffledRDD[K, V]( - @transient var prev: RDD[(K, V)], + @transient var prev: RDD[_ <: Product2[K, V]], part: Partitioner) extends RDD[(K, V)](prev.context, Nil) { diff --git a/core/src/main/scala/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/spark/rdd/SubtractedRDD.scala index 46b8cafaac..200e85d432 100644 --- a/core/src/main/scala/spark/rdd/SubtractedRDD.scala +++ b/core/src/main/scala/spark/rdd/SubtractedRDD.scala @@ -47,8 +47,8 @@ import spark.OneToOneDependency * out of memory because of the size of `rdd2`. */ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassManifest]( - @transient var rdd1: RDD[(K, V)], - @transient var rdd2: RDD[(K, W)], + @transient var rdd1: RDD[_ <: Product2[K, V]], + @transient var rdd2: RDD[_ <: Product2[K, W]], part: Partitioner) extends RDD[(K, V)](rdd1.context, Nil) { @@ -66,7 +66,7 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM new OneToOneDependency(rdd) } else { logInfo("Adding shuffle dependency with " + rdd) - new ShuffleDependency(rdd.asInstanceOf[RDD[(K, Any)]], part, serializerClass) + new ShuffleDependency(rdd, part, serializerClass) } } } diff --git a/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala b/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala index 30fded12f0..b22e6c25df 100644 --- a/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala +++ b/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala @@ -17,13 +17,15 @@ package spark.tools -import spark._ import java.lang.reflect.Method + import scala.collection.mutable.ArrayBuffer + +import spark._ import spark.api.java._ +import spark.rdd.OrderedRDDFunctions import spark.streaming.{PairDStreamFunctions, DStream, StreamingContext} import spark.streaming.api.java.{JavaPairDStream, JavaDStream, JavaStreamingContext} -import scala.Tuple2 private[spark] abstract class SparkType(val name: String) From 2a7b99c08b29d3002183a8d7ed3acd14fbf5dc41 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sun, 18 Aug 2013 20:39:29 -0700 Subject: [PATCH 072/136] Added the missing RDD files and cleaned up SparkContext. --- core/src/main/scala/spark/SparkContext.scala | 17 ++----- .../scala/spark/rdd/FlatMappedValuesRDD.scala | 36 +++++++++++++ .../scala/spark/rdd/MappedValuesRDD.scala | 34 +++++++++++++ .../scala/spark/rdd/OrderedRDDFunctions.scala | 51 +++++++++++++++++++ 4 files changed, 126 insertions(+), 12 deletions(-) create mode 100644 core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala create mode 100644 core/src/main/scala/spark/rdd/MappedValuesRDD.scala create mode 100644 core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index c049bd3fa9..5db1767146 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -20,19 +20,14 @@ package spark import java.io._ import java.net.URI import java.util.Properties -import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicInteger -import scala.collection.JavaConversions._ import scala.collection.Map import scala.collection.generic.Growable -import scala.collection.mutable.HashMap -import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.HashMap import scala.util.DynamicVariable -import scala.collection.mutable.{ConcurrentMap, HashMap} - -import akka.actor.Actor._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -54,7 +49,6 @@ import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import org.apache.hadoop.mapreduce.{Job => NewHadoopJob} import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat} -import org.apache.hadoop.security.UserGroupInformation import org.apache.mesos.MesosNativeLibrary @@ -63,15 +57,14 @@ import spark.partial.{ApproximateEvaluator, PartialResult} import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD, OrderedRDDFunctions} import spark.scheduler.{DAGScheduler, DAGSchedulerSource, ResultTask, ShuffleMapTask, SparkListener, - SplitInfo, Stage, StageInfo, TaskScheduler, ActiveJob} + SplitInfo, Stage, StageInfo, TaskScheduler} import spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, ClusterScheduler, Schedulable, SchedulingMode} import spark.scheduler.local.LocalScheduler import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend} import spark.storage.{StorageStatus, StorageUtils, RDDInfo, BlockManagerSource} +import spark.ui.SparkUI import spark.util.{MetadataCleaner, TimeStampedHashMap} -import ui.{SparkUI} -import spark.metrics._ /** * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark @@ -887,7 +880,7 @@ object SparkContext { implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( rdd: RDD[(K, V)]) = - new OrderedRDDFunctions(rdd.asInstanceOf[RDD[Product2[K, V]]]) + new OrderedRDDFunctions(rdd) implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd) diff --git a/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala b/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala new file mode 100644 index 0000000000..05fdfd82c1 --- /dev/null +++ b/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.rdd + +import spark.{TaskContext, Partition, RDD} + + +private[spark] +class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => TraversableOnce[U]) + extends RDD[(K, U)](prev) { + + override def getPartitions = firstParent[Product2[K, V]].partitions + + override val partitioner = firstParent[Product2[K, V]].partitioner + + override def compute(split: Partition, context: TaskContext) = { + firstParent[Product2[K, V]].iterator(split, context).flatMap { case (k, v) => + f(v).map(x => (k, x)) + } + } +} diff --git a/core/src/main/scala/spark/rdd/MappedValuesRDD.scala b/core/src/main/scala/spark/rdd/MappedValuesRDD.scala new file mode 100644 index 0000000000..21ae97daa9 --- /dev/null +++ b/core/src/main/scala/spark/rdd/MappedValuesRDD.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.rdd + + +import spark.{TaskContext, Partition, RDD} + +private[spark] +class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U) + extends RDD[(K, U)](prev) { + + override def getPartitions = firstParent[Product2[K, U]].partitions + + override val partitioner = firstParent[Product2[K, U]].partitioner + + override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = { + firstParent[Product2[K, V]].iterator(split, context).map { case(k ,v) => (k, f(v)) } + } +} diff --git a/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala new file mode 100644 index 0000000000..6328c6a4ac --- /dev/null +++ b/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.rdd + +import spark.{RangePartitioner, Logging, RDD} + +/** + * Extra functions available on RDDs of (key, value) pairs where the key is sortable through + * an implicit conversion. Import `spark.SparkContext._` at the top of your program to use these + * functions. They will work with any key type that has a `scala.math.Ordered` implementation. + */ +class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( + self: RDD[_ <: Product2[K, V]]) + extends Logging with Serializable { + + /** + * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling + * `collect` or `save` on the resulting RDD will return or output an ordered list of records + * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in + * order of the keys). + */ + def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size) + : RDD[(K, V)] = + { + val part = new RangePartitioner(numPartitions, self.asInstanceOf[RDD[Product2[K,V]]], ascending) + val shuffled = new ShuffledRDD[K, V](self, part) + shuffled.mapPartitions(iter => { + val buf = iter.toArray + if (ascending) { + buf.sortWith((x, y) => x._1 < y._1).iterator + } else { + buf.sortWith((x, y) => x._1 > y._1).iterator + } + }, preservesPartitioning = true) + } +} From 71d705a66eb8782e5cd5c77853fdd99fd8155334 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 19 Aug 2013 00:40:43 -0700 Subject: [PATCH 073/136] Made PairRDDFunctions taking only Tuple2, but made the rest of the shuffle code path working with general Product2. --- core/src/main/scala/spark/Aggregator.scala | 10 ++-- .../spark/BlockStoreShuffleFetcher.scala | 11 ++-- .../main/scala/spark/PairRDDFunctions.scala | 50 +++++++++---------- core/src/main/scala/spark/RDD.scala | 9 ++-- .../src/main/scala/spark/ShuffleFetcher.scala | 5 +- core/src/main/scala/spark/SparkContext.scala | 2 +- .../scala/spark/api/java/JavaPairRDD.scala | 2 +- .../main/scala/spark/rdd/CoGroupedRDD.scala | 12 ++--- .../scala/spark/rdd/FlatMappedValuesRDD.scala | 2 +- .../scala/spark/rdd/MappedValuesRDD.scala | 2 +- .../scala/spark/rdd/OrderedRDDFunctions.scala | 14 +++--- .../main/scala/spark/rdd/ShuffledRDD.scala | 12 ++--- .../main/scala/spark/rdd/SubtractedRDD.scala | 14 +++--- .../spark/scheduler/ShuffleMapTask.scala | 2 +- .../main/scala/spark/util/MutablePair.scala | 34 +++++++++++++ .../test/scala/spark/CheckpointSuite.scala | 2 +- .../scala/spark/PairRDDFunctionsSuite.scala | 7 +-- core/src/test/scala/spark/RDDSuite.scala | 2 +- core/src/test/scala/spark/ShuffleSuite.scala | 31 +++++++++--- .../tools/JavaAPICompletenessChecker.scala | 2 +- 20 files changed, 133 insertions(+), 92 deletions(-) create mode 100644 core/src/main/scala/spark/util/MutablePair.scala diff --git a/core/src/main/scala/spark/Aggregator.scala b/core/src/main/scala/spark/Aggregator.scala index 3920f8511c..9af401986d 100644 --- a/core/src/main/scala/spark/Aggregator.scala +++ b/core/src/main/scala/spark/Aggregator.scala @@ -34,12 +34,12 @@ case class Aggregator[K, V, C] ( def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]) : Iterator[(K, C)] = { val combiners = new JHashMap[K, C] - for ((k, v) <- iter) { - val oldC = combiners.get(k) + for (kv <- iter) { + val oldC = combiners.get(kv._1) if (oldC == null) { - combiners.put(k, createCombiner(v)) + combiners.put(kv._1, createCombiner(kv._2)) } else { - combiners.put(k, mergeValue(oldC, v)) + combiners.put(kv._1, mergeValue(oldC, kv._2)) } } combiners.iterator @@ -47,7 +47,7 @@ case class Aggregator[K, V, C] ( def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] = { val combiners = new JHashMap[K, C] - for ((k, c) <- iter) { + iter.foreach { case(k, c) => val oldC = combiners.get(k) if (oldC == null) { combiners.put(k, c) diff --git a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala index 8f6953b1f5..1ec95ed9b8 100644 --- a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala +++ b/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala @@ -28,8 +28,9 @@ import spark.util.CompletionIterator private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Logging { - override def fetch[K, V]( - shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer) = { + override def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer) + : Iterator[T] = + { logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId)) val blockManager = SparkEnv.get.blockManager @@ -49,12 +50,12 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin (address, splits.map(s => ("shuffle_%d_%d_%d".format(shuffleId, s._1, reduceId), s._2))) } - def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[(K, V)] = { + def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[T] = { val blockId = blockPair._1 val blockOption = blockPair._2 blockOption match { case Some(block) => { - block.asInstanceOf[Iterator[(K, V)]] + block.asInstanceOf[Iterator[T]] } case None => { val regex = "shuffle_([0-9]*)_([0-9]*)_([0-9]*)".r @@ -73,7 +74,7 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin val blockFetcherItr = blockManager.getMultiple(blocksByAddress, serializer) val itr = blockFetcherItr.flatMap(unpackBlock) - CompletionIterator[(K,V), Iterator[(K,V)]](itr, { + CompletionIterator[T, Iterator[T]](itr, { val shuffleMetrics = new ShuffleReadMetrics shuffleMetrics.shuffleFinishTime = System.currentTimeMillis shuffleMetrics.remoteFetchTime = blockFetcherItr.remoteFetchTime diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index 3ae703ce1a..f8900d3921 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -48,7 +48,7 @@ import spark.Partitioner._ * Extra functions available on RDDs of (key, value) pairs through an implicit conversion. * Import `spark.SparkContext._` at the top of your program to use these functions. */ -class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Product2[K, V]]) +class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)]) extends Logging with HadoopMapReduceUtil with Serializable { @@ -85,13 +85,14 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) } else if (mapSideCombine) { val combined = self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) - val partitioned = new ShuffledRDD[K, C](combined, partitioner).setSerializer(serializerClass) + val partitioned = new ShuffledRDD[K, C, (K, C)](combined, partitioner) + .setSerializer(serializerClass) partitioned.mapPartitions(aggregator.combineCombinersByKey, preservesPartitioning = true) } else { // Don't apply map-side combiner. // A sanity check to make sure mergeCombiners is not defined. assert(mergeCombiners == null) - val values = new ShuffledRDD[K, V](self, partitioner).setSerializer(serializerClass) + val values = new ShuffledRDD[K, V, (K, V)](self, partitioner).setSerializer(serializerClass) values.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true) } } @@ -162,7 +163,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc throw new SparkException("reduceByKeyLocally() does not support array keys") } - def reducePartition(iter: Iterator[Product2[K, V]]): Iterator[JHashMap[K, V]] = { + def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = { val map = new JHashMap[K, V] for ((k, v) <- iter) { val old = map.get(k) @@ -236,7 +237,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc if (getKeyClass().isArray && partitioner.isInstanceOf[HashPartitioner]) { throw new SparkException("Default partitioner cannot partition array keys.") } - new ShuffledRDD[K, V](self, partitioner) + new ShuffledRDD[K, V, (K, V)](self, partitioner) } /** @@ -245,9 +246,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD. */ def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = { - this.cogroup(other, partitioner).flatMapValues { - case (vs, ws) => - for (v <- vs.iterator; w <- ws.iterator) yield (v, w) + this.cogroup(other, partitioner).flatMapValues { case (vs, ws) => + for (v <- vs.iterator; w <- ws.iterator) yield (v, w) } } @@ -258,13 +258,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc * partition the output RDD. */ def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = { - this.cogroup(other, partitioner).flatMapValues { - case (vs, ws) => - if (ws.isEmpty) { - vs.iterator.map(v => (v, None)) - } else { - for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w)) - } + this.cogroup(other, partitioner).flatMapValues { case (vs, ws) => + if (ws.isEmpty) { + vs.iterator.map(v => (v, None)) + } else { + for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w)) + } } } @@ -276,13 +275,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc */ def rightOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner) : RDD[(K, (Option[V], W))] = { - this.cogroup(other, partitioner).flatMapValues { - case (vs, ws) => - if (vs.isEmpty) { - ws.iterator.map(w => (None, w)) - } else { - for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w) - } + this.cogroup(other, partitioner).flatMapValues { case (vs, ws) => + if (vs.isEmpty) { + ws.iterator.map(w => (None, w)) + } else { + for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w) + } } } @@ -378,7 +376,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc val data = self.toArray() val map = new mutable.HashMap[K, V] map.sizeHint(data.length) - data.foreach { case(k, v) => map.put(k, v) } + data.foreach { case (k, v) => map.put(k, v) } map } @@ -501,7 +499,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc self.partitioner match { case Some(p) => val index = p.getPartition(key) - def process(it: Iterator[Product2[K, V]]): Seq[V] = { + def process(it: Iterator[(K, V)]): Seq[V] = { val buf = new ArrayBuffer[V] for ((k, v) <- it if k == key) { buf += v @@ -559,7 +557,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc val formatter = new SimpleDateFormat("yyyyMMddHHmm") val jobtrackerID = formatter.format(new Date()) val stageId = self.id - def writeShard(context: spark.TaskContext, iter: Iterator[Product2[K,V]]): Int = { + def writeShard(context: spark.TaskContext, iter: Iterator[(K,V)]): Int = { // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt @@ -658,7 +656,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[_ <: Produc val writer = new HadoopWriter(conf) writer.preSetup() - def writeToFile(context: TaskContext, iter: Iterator[Product2[K,V]]) { + def writeToFile(context: TaskContext, iter: Iterator[(K, V)]) { // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index 04b37df212..c9a044afab 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -31,9 +31,8 @@ import org.apache.hadoop.mapred.TextOutputFormat import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap} -import spark.api.java.JavaRDD -import spark.broadcast.Broadcast import spark.Partitioner._ +import spark.api.java.JavaRDD import spark.partial.BoundedDouble import spark.partial.CountEvaluator import spark.partial.GroupedCountEvaluator @@ -288,7 +287,7 @@ abstract class RDD[T: ClassManifest]( if (shuffle) { // include a shuffle step so that our upstream tasks are still distributed new CoalescedRDD( - new ShuffledRDD(map(x => (x, null)), + new ShuffledRDD[T, Null, (T, Null)](map(x => (x, null)), new HashPartitioner(numPartitions)), numPartitions).keys } else { @@ -305,8 +304,8 @@ abstract class RDD[T: ClassManifest]( def takeSample(withReplacement: Boolean, num: Int, seed: Int): Array[T] = { var fraction = 0.0 var total = 0 - var multiplier = 3.0 - var initialCount = this.count() + val multiplier = 3.0 + val initialCount = this.count() var maxSelected = 0 if (num < 0) { diff --git a/core/src/main/scala/spark/ShuffleFetcher.scala b/core/src/main/scala/spark/ShuffleFetcher.scala index dcced035e7..a6839cf7a4 100644 --- a/core/src/main/scala/spark/ShuffleFetcher.scala +++ b/core/src/main/scala/spark/ShuffleFetcher.scala @@ -22,12 +22,13 @@ import spark.serializer.Serializer private[spark] abstract class ShuffleFetcher { + /** * Fetch the shuffle outputs for a given ShuffleDependency. * @return An iterator over the elements of the fetched shuffle outputs. */ - def fetch[K, V](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, - serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[(K,V)] + def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, + serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[T] /** Stop the fetcher */ def stop() {} diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index 5db1767146..185c76366f 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -880,7 +880,7 @@ object SparkContext { implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( rdd: RDD[(K, V)]) = - new OrderedRDDFunctions(rdd) + new OrderedRDDFunctions[K, V, (K, V)](rdd) implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd) diff --git a/core/src/main/scala/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/spark/api/java/JavaPairRDD.scala index f5632428e7..effe6e5e0d 100644 --- a/core/src/main/scala/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/spark/api/java/JavaPairRDD.scala @@ -564,7 +564,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif override def compare(b: K) = comp.compare(a, b) } implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x) - fromRDD(new OrderedRDDFunctions(rdd).sortByKey(ascending)) + fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending)) } /** diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala index 06e15bb73c..01b6c23dcc 100644 --- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoGroupedRDD.scala @@ -73,10 +73,10 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: override def getDependencies: Seq[Dependency[_]] = { rdds.map { rdd: RDD[_ <: Product2[K, _]] => if (rdd.partitioner == Some(part)) { - logInfo("Adding one-to-one dependency with " + rdd) + logDebug("Adding one-to-one dependency with " + rdd) new OneToOneDependency(rdd) } else { - logInfo("Adding shuffle dependency with " + rdd) + logDebug("Adding shuffle dependency with " + rdd) new ShuffleDependency[Any, Any](rdd, part, serializerClass) } } @@ -122,15 +122,15 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: for ((dep, depNum) <- split.deps.zipWithIndex) dep match { case NarrowCoGroupSplitDep(rdd, _, itsSplit) => { // Read them from the parent - for ((k, v) <- rdd.iterator(itsSplit, context)) { - getSeq(k.asInstanceOf[K])(depNum) += v + rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]].foreach { kv => + getSeq(kv._1)(depNum) += kv._2 } } case ShuffleCoGroupSplitDep(shuffleId) => { // Read map outputs of shuffle val fetcher = SparkEnv.get.shuffleFetcher - fetcher.fetch[K, Any](shuffleId, split.index, context.taskMetrics, ser).foreach { - case (key, value) => getSeq(key)(depNum) += value + fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context.taskMetrics, ser).foreach { + kv => getSeq(kv._1)(depNum) += kv._2 } } } diff --git a/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala b/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala index 05fdfd82c1..a6bdce89d8 100644 --- a/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala +++ b/core/src/main/scala/spark/rdd/FlatMappedValuesRDD.scala @@ -29,7 +29,7 @@ class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => Trave override val partitioner = firstParent[Product2[K, V]].partitioner override def compute(split: Partition, context: TaskContext) = { - firstParent[Product2[K, V]].iterator(split, context).flatMap { case (k, v) => + firstParent[Product2[K, V]].iterator(split, context).flatMap { case Product2(k, v) => f(v).map(x => (k, x)) } } diff --git a/core/src/main/scala/spark/rdd/MappedValuesRDD.scala b/core/src/main/scala/spark/rdd/MappedValuesRDD.scala index 21ae97daa9..8334e3b557 100644 --- a/core/src/main/scala/spark/rdd/MappedValuesRDD.scala +++ b/core/src/main/scala/spark/rdd/MappedValuesRDD.scala @@ -29,6 +29,6 @@ class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U) override val partitioner = firstParent[Product2[K, U]].partitioner override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = { - firstParent[Product2[K, V]].iterator(split, context).map { case(k ,v) => (k, f(v)) } + firstParent[Product2[K, V]].iterator(split, context).map { case Product2(k ,v) => (k, f(v)) } } } diff --git a/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala index 6328c6a4ac..9154b76035 100644 --- a/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala +++ b/core/src/main/scala/spark/rdd/OrderedRDDFunctions.scala @@ -24,8 +24,10 @@ import spark.{RangePartitioner, Logging, RDD} * an implicit conversion. Import `spark.SparkContext._` at the top of your program to use these * functions. They will work with any key type that has a `scala.math.Ordered` implementation. */ -class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( - self: RDD[_ <: Product2[K, V]]) +class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, + V: ClassManifest, + P <: Product2[K, V] : ClassManifest]( + self: RDD[P]) extends Logging with Serializable { /** @@ -34,11 +36,9 @@ class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest]( * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in * order of the keys). */ - def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size) - : RDD[(K, V)] = - { - val part = new RangePartitioner(numPartitions, self.asInstanceOf[RDD[Product2[K,V]]], ascending) - val shuffled = new ShuffledRDD[K, V](self, part) + def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[P] = { + val part = new RangePartitioner(numPartitions, self, ascending) + val shuffled = new ShuffledRDD[K, V, P](self, part) shuffled.mapPartitions(iter => { val buf = iter.toArray if (ascending) { diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/spark/rdd/ShuffledRDD.scala index 2eac62f9c0..51c05af064 100644 --- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala +++ b/core/src/main/scala/spark/rdd/ShuffledRDD.scala @@ -32,14 +32,14 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { * @tparam K the key class. * @tparam V the value class. */ -class ShuffledRDD[K, V]( - @transient var prev: RDD[_ <: Product2[K, V]], +class ShuffledRDD[K, V, P <: Product2[K, V] : ClassManifest]( + @transient var prev: RDD[P], part: Partitioner) - extends RDD[(K, V)](prev.context, Nil) { + extends RDD[P](prev.context, Nil) { private var serializerClass: String = null - def setSerializer(cls: String): ShuffledRDD[K, V] = { + def setSerializer(cls: String): ShuffledRDD[K, V, P] = { serializerClass = cls this } @@ -54,9 +54,9 @@ class ShuffledRDD[K, V]( Array.tabulate[Partition](part.numPartitions)(i => new ShuffledRDDPartition(i)) } - override def compute(split: Partition, context: TaskContext): Iterator[(K, V)] = { + override def compute(split: Partition, context: TaskContext): Iterator[P] = { val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId - SparkEnv.get.shuffleFetcher.fetch[K, V](shuffledId, split.index, context.taskMetrics, + SparkEnv.get.shuffleFetcher.fetch[P](shuffledId, split.index, context.taskMetrics, SparkEnv.get.serializerManager.get(serializerClass)) } diff --git a/core/src/main/scala/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/spark/rdd/SubtractedRDD.scala index 200e85d432..dadef5e17d 100644 --- a/core/src/main/scala/spark/rdd/SubtractedRDD.scala +++ b/core/src/main/scala/spark/rdd/SubtractedRDD.scala @@ -62,10 +62,10 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM override def getDependencies: Seq[Dependency[_]] = { Seq(rdd1, rdd2).map { rdd => if (rdd.partitioner == Some(part)) { - logInfo("Adding one-to-one dependency with " + rdd) + logDebug("Adding one-to-one dependency with " + rdd) new OneToOneDependency(rdd) } else { - logInfo("Adding shuffle dependency with " + rdd) + logDebug("Adding shuffle dependency with " + rdd) new ShuffleDependency(rdd, part, serializerClass) } } @@ -103,16 +103,14 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM seq } } - def integrate(dep: CoGroupSplitDep, op: ((K, V)) => Unit) = dep match { + def integrate(dep: CoGroupSplitDep, op: Product2[K, V] => Unit) = dep match { case NarrowCoGroupSplitDep(rdd, _, itsSplit) => { - for (t <- rdd.iterator(itsSplit, context)) - op(t.asInstanceOf[(K, V)]) + rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, V]]].foreach(op) } case ShuffleCoGroupSplitDep(shuffleId) => { - val iter = SparkEnv.get.shuffleFetcher.fetch(shuffleId, partition.index, + val iter = SparkEnv.get.shuffleFetcher.fetch[Product2[K, V]](shuffleId, partition.index, context.taskMetrics, serializer) - for (t <- iter) - op(t.asInstanceOf[(K, V)]) + iter.foreach(op) } } // the first dep is rdd1; add all values to the map diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala index e3bb6d1e60..121ff31121 100644 --- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala @@ -148,7 +148,7 @@ private[spark] class ShuffleMapTask( // Write the map output to its associated buckets. for (elem <- rdd.iterator(split, taskContext)) { - val pair = elem.asInstanceOf[(Any, Any)] + val pair = elem.asInstanceOf[Product2[Any, Any]] val bucketId = dep.partitioner.getPartition(pair._1) buckets.writers(bucketId).write(pair) } diff --git a/core/src/main/scala/spark/util/MutablePair.scala b/core/src/main/scala/spark/util/MutablePair.scala new file mode 100644 index 0000000000..117218bf47 --- /dev/null +++ b/core/src/main/scala/spark/util/MutablePair.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package spark.util + + +/** A tuple of 2 elements. + * @param _1 Element 1 of this MutablePair + * @param _2 Element 2 of this MutablePair + */ +case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T1, + @specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T2] + (var _1: T1,var _2: T2) + extends Product2[T1, T2] +{ + + override def toString = "(" + _1 + "," + _2 + ")" + + def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[T1, T2]] +} diff --git a/core/src/test/scala/spark/CheckpointSuite.scala b/core/src/test/scala/spark/CheckpointSuite.scala index a84c89e3c9..966dede2be 100644 --- a/core/src/test/scala/spark/CheckpointSuite.scala +++ b/core/src/test/scala/spark/CheckpointSuite.scala @@ -99,7 +99,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging { test("ShuffledRDD") { testCheckpointing(rdd => { // Creating ShuffledRDD directly as PairRDDFunctions.combineByKey produces a MapPartitionedRDD - new ShuffledRDD(rdd.map(x => (x % 2, 1)), partitioner) + new ShuffledRDD[Int, Int, (Int, Int)](rdd.map(x => (x % 2, 1)), partitioner) }) } diff --git a/core/src/test/scala/spark/PairRDDFunctionsSuite.scala b/core/src/test/scala/spark/PairRDDFunctionsSuite.scala index b102eaf4e6..328b3b5497 100644 --- a/core/src/test/scala/spark/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/spark/PairRDDFunctionsSuite.scala @@ -21,16 +21,11 @@ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashSet import org.scalatest.FunSuite -import org.scalatest.prop.Checkers -import org.scalacheck.Arbitrary._ -import org.scalacheck.Gen -import org.scalacheck.Prop._ import com.google.common.io.Files - -import spark.rdd.ShuffledRDD import spark.SparkContext._ + class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { test("groupByKey") { val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1))) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index cbddf4e523..75778de1cc 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -170,7 +170,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { // we can optionally shuffle to keep the upstream parallel val coalesced5 = data.coalesce(1, shuffle = true) - assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _]] != + assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _, _]] != null) } diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/spark/ShuffleSuite.scala index c686b8cc5a..c319a57fdd 100644 --- a/core/src/test/scala/spark/ShuffleSuite.scala +++ b/core/src/test/scala/spark/ShuffleSuite.scala @@ -22,6 +22,9 @@ import org.scalatest.matchers.ShouldMatchers import spark.rdd.ShuffledRDD import spark.SparkContext._ +import spark.ShuffleSuite.NonJavaSerializableClass +import spark.util.MutablePair + class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { test("groupByKey without compression") { @@ -46,12 +49,12 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { val a = sc.parallelize(1 to 10, 2) val b = a.map { x => - (x, new ShuffleSuite.NonJavaSerializableClass(x * 2)) + (x, new NonJavaSerializableClass(x * 2)) } // If the Kryo serializer is not used correctly, the shuffle would fail because the // default Java serializer cannot handle the non serializable class. - val c = new ShuffledRDD(b, new HashPartitioner(NUM_BLOCKS)) - .setSerializer(classOf[spark.KryoSerializer].getName) + val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)]( + b, new HashPartitioner(NUM_BLOCKS)).setSerializer(classOf[spark.KryoSerializer].getName) val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId assert(c.count === 10) @@ -68,12 +71,12 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { sc = new SparkContext("local-cluster[2,1,512]", "test") val a = sc.parallelize(1 to 10, 2) val b = a.map { x => - (x, new ShuffleSuite.NonJavaSerializableClass(x * 2)) + (x, new NonJavaSerializableClass(x * 2)) } // If the Kryo serializer is not used correctly, the shuffle would fail because the // default Java serializer cannot handle the non serializable class. - val c = new ShuffledRDD(b, new HashPartitioner(3)) - .setSerializer(classOf[spark.KryoSerializer].getName) + val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)]( + b, new HashPartitioner(3)).setSerializer(classOf[spark.KryoSerializer].getName) assert(c.count === 10) } @@ -88,7 +91,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { // NOTE: The default Java serializer doesn't create zero-sized blocks. // So, use Kryo - val c = new ShuffledRDD(b, new HashPartitioner(10)) + val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10)) .setSerializer(classOf[spark.KryoSerializer].getName) val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId @@ -114,7 +117,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { val b = a.map(x => (x, x*2)) // NOTE: The default Java serializer should create zero-sized blocks - val c = new ShuffledRDD(b, new HashPartitioner(10)) + val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10)) val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId assert(c.count === 4) @@ -128,6 +131,18 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { // We should have at most 4 non-zero sized partitions assert(nonEmptyBlocks.size <= 4) } + + test("shuffle using mutable pairs") { + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data = Array(p(1, 1), p(1, 2), p(1, 3), p(2, 1)) + val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data) + val results = new ShuffledRDD[Int, Int, MutablePair[Int, Int]](pairs, new HashPartitioner(2)) + .collect() + + data.foreach { pair => results should contain (pair) } + } } object ShuffleSuite { diff --git a/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala b/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala index b22e6c25df..f45d0b281c 100644 --- a/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala +++ b/tools/src/main/scala/spark/tools/JavaAPICompletenessChecker.scala @@ -338,7 +338,7 @@ object JavaAPICompletenessChecker { println() println("Missing OrderedRDD methods") - printMissingMethods(classOf[OrderedRDDFunctions[_, _]], classOf[JavaPairRDD[_, _]]) + printMissingMethods(classOf[OrderedRDDFunctions[_, _, _]], classOf[JavaPairRDD[_, _]]) println() println("Missing SparkContext methods") From acc4aa1f4701235be6eae25a9b940f36a87ea685 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 19 Aug 2013 11:02:10 -0700 Subject: [PATCH 074/136] Added a test for sorting using MutablePair's. --- core/src/test/scala/spark/ShuffleSuite.scala | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/spark/ShuffleSuite.scala index c319a57fdd..f1361546a3 100644 --- a/core/src/test/scala/spark/ShuffleSuite.scala +++ b/core/src/test/scala/spark/ShuffleSuite.scala @@ -20,9 +20,10 @@ package spark import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers -import spark.rdd.ShuffledRDD import spark.SparkContext._ import spark.ShuffleSuite.NonJavaSerializableClass +import spark.rdd.OrderedRDDFunctions +import spark.rdd.ShuffledRDD import spark.util.MutablePair @@ -137,12 +138,27 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { sc = new SparkContext("local-cluster[2,1,512]", "test") def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) val data = Array(p(1, 1), p(1, 2), p(1, 3), p(2, 1)) - val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data) + val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2) val results = new ShuffledRDD[Int, Int, MutablePair[Int, Int]](pairs, new HashPartitioner(2)) .collect() data.foreach { pair => results should contain (pair) } } + + test("sorting using mutable pairs") { + // This is not in SortingSuite because of the local cluster setup. + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data = Array(p(1, 11), p(3, 33), p(100, 100), p(2, 22)) + val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2) + val results = new OrderedRDDFunctions[Int, Int, MutablePair[Int, Int]](pairs) + .sortByKey().collect() + results(0) should be (p(1, 11)) + results(1) should be (p(2, 22)) + results(2) should be (p(3, 33)) + results(3) should be (p(100, 100)) + } } object ShuffleSuite { From 6f6944c8079bffdd088ddb0a84fbf83356e294ea Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Mon, 19 Aug 2013 12:33:13 -0700 Subject: [PATCH 075/136] Update SBT build to use simpler fix for Hadoop 0.23.9 --- project/SparkBuild.scala | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index cea982b886..282b0cbed5 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -190,22 +190,13 @@ object SparkBuild extends Build { "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), + "org.apache.avro" % "avro" % "1.7.4", + "org.apache.avro" % "avro-ipc" % "1.7.4" excludeAll(excludeNetty), "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", "com.twitter" % "chill_2.9.3" % "0.3.1", "com.twitter" % "chill-java" % "0.3.1" - ) ++ ( - if (isYarnMode) { - // This kludge is needed for 0.23.x - Seq( - "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), - "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm) - ) - } else { - Seq() - } ) ) ++ assemblySettings ++ extraAssemblySettings From 5054abd41b4bac4b7c8159dc23c7ee15aeb7ef2a Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 19 Aug 2013 12:58:02 -0700 Subject: [PATCH 076/136] Code review feedback. (added tests for cogroup and substract; added more documentation on MutablePair) --- .../main/scala/spark/PairRDDFunctions.scala | 4 +- .../main/scala/spark/util/MutablePair.scala | 16 +++---- core/src/test/scala/spark/ShuffleSuite.scala | 42 ++++++++++++++++++- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index f8900d3921..e7d4a7f562 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -165,7 +165,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)]) def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = { val map = new JHashMap[K, V] - for ((k, v) <- iter) { + iter.foreach { case (k, v) => val old = map.get(k) map.put(k, if (old == null) v else func(old, v)) } @@ -173,7 +173,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)]) } def mergeMaps(m1: JHashMap[K, V], m2: JHashMap[K, V]): JHashMap[K, V] = { - for ((k, v) <- m2) { + m2.foreach { case (k, v) => val old = m1.get(k) m1.put(k, if (old == null) v else func(old, v)) } diff --git a/core/src/main/scala/spark/util/MutablePair.scala b/core/src/main/scala/spark/util/MutablePair.scala index 117218bf47..3063806e83 100644 --- a/core/src/main/scala/spark/util/MutablePair.scala +++ b/core/src/main/scala/spark/util/MutablePair.scala @@ -18,17 +18,19 @@ package spark.util -/** A tuple of 2 elements. - * @param _1 Element 1 of this MutablePair - * @param _2 Element 2 of this MutablePair - */ +/** + * A tuple of 2 elements. This can be used as an alternative to Scala's Tuple2 when we want to + * minimize object allocation. + * + * @param _1 Element 1 of this MutablePair + * @param _2 Element 2 of this MutablePair + */ case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T1, @specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T2] - (var _1: T1,var _2: T2) + (var _1: T1, var _2: T2) extends Product2[T1, T2] { - override def toString = "(" + _1 + "," + _2 + ")" - def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[T1, T2]] + override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[T1, T2]] } diff --git a/core/src/test/scala/spark/ShuffleSuite.scala b/core/src/test/scala/spark/ShuffleSuite.scala index f1361546a3..8745689c70 100644 --- a/core/src/test/scala/spark/ShuffleSuite.scala +++ b/core/src/test/scala/spark/ShuffleSuite.scala @@ -22,8 +22,7 @@ import org.scalatest.matchers.ShouldMatchers import spark.SparkContext._ import spark.ShuffleSuite.NonJavaSerializableClass -import spark.rdd.OrderedRDDFunctions -import spark.rdd.ShuffledRDD +import spark.rdd.{SubtractedRDD, CoGroupedRDD, OrderedRDDFunctions, ShuffledRDD} import spark.util.MutablePair @@ -159,6 +158,45 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext { results(2) should be (p(3, 33)) results(3) should be (p(100, 100)) } + + test("cogroup using mutable pairs") { + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1)) + val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"), p(3, "3")) + val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2) + val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2) + val results = new CoGroupedRDD[Int](Seq(pairs1, pairs2), new HashPartitioner(2)).collectAsMap() + + assert(results(1)(0).length === 3) + assert(results(1)(0).contains(1)) + assert(results(1)(0).contains(2)) + assert(results(1)(0).contains(3)) + assert(results(1)(1).length === 2) + assert(results(1)(1).contains("11")) + assert(results(1)(1).contains("12")) + assert(results(2)(0).length === 1) + assert(results(2)(0).contains(1)) + assert(results(2)(1).length === 1) + assert(results(2)(1).contains("22")) + assert(results(3)(0).length === 0) + assert(results(3)(1).contains("3")) + } + + test("subtract mutable pairs") { + // Use a local cluster with 2 processes to make sure there are both local and remote blocks + sc = new SparkContext("local-cluster[2,1,512]", "test") + def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2) + val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1), p(3, 33)) + val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22")) + val pairs1: RDD[MutablePair[Int, Int]] = sc.parallelize(data1, 2) + val pairs2: RDD[MutablePair[Int, String]] = sc.parallelize(data2, 2) + val results = new SubtractedRDD(pairs1, pairs2, new HashPartitioner(2)).collect() + results should have length (1) + // substracted rdd return results as Tuple2 + results(0) should be ((3, 33)) + } } object ShuffleSuite { From 498a26189b197bdaf4be47e6a8baca7b97fe9064 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Mon, 19 Aug 2013 18:17:49 -0700 Subject: [PATCH 077/136] Small fixes to web UI: - Use SPARK_PUBLIC_DNS environment variable if set (for EC2) - Use a non-ephemeral port (3030 instead of 33000) by default - Updated test to use non-ephemeral port too --- core/src/main/scala/spark/ui/SparkUI.scala | 10 +++++----- core/src/test/scala/spark/ui/UISuite.scala | 2 +- ec2/spark_ec2.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/spark/ui/SparkUI.scala b/core/src/main/scala/spark/ui/SparkUI.scala index 1fd5a0989e..23ded44ba3 100644 --- a/core/src/main/scala/spark/ui/SparkUI.scala +++ b/core/src/main/scala/spark/ui/SparkUI.scala @@ -30,7 +30,7 @@ import spark.ui.JettyUtils._ /** Top level user interface for Spark */ private[spark] class SparkUI(sc: SparkContext) extends Logging { - val host = Utils.localHostName() + val host = Option(System.getenv("SPARK_PUBLIC_DNS")).getOrElse(Utils.localHostName()) val port = Option(System.getProperty("spark.ui.port")).getOrElse(SparkUI.DEFAULT_PORT).toInt var boundPort: Option[Int] = None var server: Option[Server] = None @@ -58,9 +58,9 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { server = Some(srv) boundPort = Some(usedPort) } catch { - case e: Exception => - logError("Failed to create Spark JettyUtils", e) - System.exit(1) + case e: Exception => + logError("Failed to create Spark JettyUtils", e) + System.exit(1) } } @@ -82,6 +82,6 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { } private[spark] object SparkUI { - val DEFAULT_PORT = "33000" + val DEFAULT_PORT = "3030" val STATIC_RESOURCE_DIR = "spark/ui/static" } diff --git a/core/src/test/scala/spark/ui/UISuite.scala b/core/src/test/scala/spark/ui/UISuite.scala index 56c1fed6ad..41cb0e0a35 100644 --- a/core/src/test/scala/spark/ui/UISuite.scala +++ b/core/src/test/scala/spark/ui/UISuite.scala @@ -24,7 +24,7 @@ import org.eclipse.jetty.server.Server class UISuite extends FunSuite { test("jetty port increases under contention") { - val startPort = 33333 + val startPort = 3030 val server = new Server(startPort) server.start() val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq()) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 740ec08542..4087c3bc2b 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -178,7 +178,7 @@ def launch_cluster(conn, opts, cluster_name): master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') - master_group.authorize('tcp', 33000, 33010, '0.0.0.0/0') + master_group.authorize('tcp', 3030, 3030, '0.0.0.0/0') if opts.cluster_type == "mesos": master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') if opts.ganglia: From 793a722f8e14552b8d36f46cca39d336dc2df9dd Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Mon, 19 Aug 2013 18:35:47 -0700 Subject: [PATCH 078/136] Allow some wiggle room in UISuite port test and in EC2 ports --- core/src/test/scala/spark/ui/UISuite.scala | 5 +++-- ec2/spark_ec2.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/spark/ui/UISuite.scala b/core/src/test/scala/spark/ui/UISuite.scala index 41cb0e0a35..735a794396 100644 --- a/core/src/test/scala/spark/ui/UISuite.scala +++ b/core/src/test/scala/spark/ui/UISuite.scala @@ -30,8 +30,9 @@ class UISuite extends FunSuite { val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq()) val (jettyServer2, boundPort2) = JettyUtils.startJettyServer("localhost", startPort, Seq()) - assert(boundPort1 === startPort + 1) - assert(boundPort2 === startPort + 2) + // Allow some wiggle room in case ports on the machine are under contention + assert(boundPort1 > startPort && boundPort1 < startPort + 10) + assert(boundPort2 > boundPort1 && boundPort2 < boundPort1 + 10) } test("jetty binds to port 0 correctly") { diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 4087c3bc2b..6127c10ced 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -178,7 +178,7 @@ def launch_cluster(conn, opts, cluster_name): master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0') master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0') master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0') - master_group.authorize('tcp', 3030, 3030, '0.0.0.0/0') + master_group.authorize('tcp', 3030, 3035, '0.0.0.0/0') if opts.cluster_type == "mesos": master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0') if opts.ganglia: From ad18410427190572f90754624469a7e806c78971 Mon Sep 17 00:00:00 2001 From: Mark Hamstra Date: Fri, 16 Aug 2013 12:14:52 -0700 Subject: [PATCH 079/136] Renamed 'priority' to 'jobId' and assorted minor changes --- .../scala/spark/scheduler/ActiveJob.scala | 2 +- .../scala/spark/scheduler/DAGScheduler.scala | 84 +++++++++---------- .../spark/scheduler/DAGSchedulerSource.scala | 2 +- .../scala/spark/scheduler/JobLogger.scala | 24 +++--- .../main/scala/spark/scheduler/Stage.scala | 7 +- 5 files changed, 60 insertions(+), 59 deletions(-) diff --git a/core/src/main/scala/spark/scheduler/ActiveJob.scala b/core/src/main/scala/spark/scheduler/ActiveJob.scala index 71cc94edb6..fecc3e9648 100644 --- a/core/src/main/scala/spark/scheduler/ActiveJob.scala +++ b/core/src/main/scala/spark/scheduler/ActiveJob.scala @@ -25,7 +25,7 @@ import java.util.Properties * Tracks information about an active job in the DAGScheduler. */ private[spark] class ActiveJob( - val runId: Int, + val jobId: Int, val finalStage: Stage, val func: (TaskContext, Iterator[_]) => _, val partitions: Array[Int], diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala index 35b31f45a7..7823d0c8cf 100644 --- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala @@ -104,11 +104,11 @@ class DAGScheduler( private val eventQueue = new LinkedBlockingQueue[DAGSchedulerEvent] - val nextRunId = new AtomicInteger(0) + val nextJobId = new AtomicInteger(0) val nextStageId = new AtomicInteger(0) - val idToStage = new TimeStampedHashMap[Int, Stage] + val stageIdToStage = new TimeStampedHashMap[Int, Stage] val shuffleToMapStage = new TimeStampedHashMap[Int, Stage] @@ -171,14 +171,14 @@ class DAGScheduler( /** * Get or create a shuffle map stage for the given shuffle dependency's map side. - * The priority value passed in will be used if the stage doesn't already exist with - * a lower priority (we assume that priorities always increase across jobs for now). + * The jobId value passed in will be used if the stage doesn't already exist with + * a lower jobId (jobId always increases across jobs.) */ - private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], priority: Int): Stage = { + private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], jobId: Int): Stage = { shuffleToMapStage.get(shuffleDep.shuffleId) match { case Some(stage) => stage case None => - val stage = newStage(shuffleDep.rdd, Some(shuffleDep), priority) + val stage = newStage(shuffleDep.rdd, Some(shuffleDep), jobId) shuffleToMapStage(shuffleDep.shuffleId) = stage stage } @@ -186,13 +186,13 @@ class DAGScheduler( /** * Create a Stage for the given RDD, either as a shuffle map stage (for a ShuffleDependency) or - * as a result stage for the final RDD used directly in an action. The stage will also be given - * the provided priority. + * as a result stage for the final RDD used directly in an action. The stage will also be + * associated with the provided jobId. */ private def newStage( rdd: RDD[_], shuffleDep: Option[ShuffleDependency[_,_]], - priority: Int, + jobId: Int, callSite: Option[String] = None) : Stage = { @@ -203,17 +203,17 @@ class DAGScheduler( mapOutputTracker.registerShuffle(shuffleDep.get.shuffleId, rdd.partitions.size) } val id = nextStageId.getAndIncrement() - val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, priority), priority, callSite) - idToStage(id) = stage + val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, jobId), jobId, callSite) + stageIdToStage(id) = stage stageToInfos(stage) = StageInfo(stage) stage } /** * Get or create the list of parent stages for a given RDD. The stages will be assigned the - * provided priority if they haven't already been created with a lower priority. + * provided jobId if they haven't already been created with a lower jobId. */ - private def getParentStages(rdd: RDD[_], priority: Int): List[Stage] = { + private def getParentStages(rdd: RDD[_], jobId: Int): List[Stage] = { val parents = new HashSet[Stage] val visited = new HashSet[RDD[_]] def visit(r: RDD[_]) { @@ -224,7 +224,7 @@ class DAGScheduler( for (dep <- r.dependencies) { dep match { case shufDep: ShuffleDependency[_,_] => - parents += getShuffleMapStage(shufDep, priority) + parents += getShuffleMapStage(shufDep, jobId) case _ => visit(dep.rdd) } @@ -245,7 +245,7 @@ class DAGScheduler( for (dep <- rdd.dependencies) { dep match { case shufDep: ShuffleDependency[_,_] => - val mapStage = getShuffleMapStage(shufDep, stage.priority) + val mapStage = getShuffleMapStage(shufDep, stage.jobId) if (!mapStage.isAvailable) { missing += mapStage } @@ -282,7 +282,7 @@ class DAGScheduler( val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _] val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties) - return (toSubmit, waiter) + (toSubmit, waiter) } def runJob[T, U: ClassManifest]( @@ -329,8 +329,8 @@ class DAGScheduler( val listener = new ApproximateActionListener(rdd, func, evaluator, timeout) val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _] val partitions = (0 until rdd.partitions.size).toArray - eventQueue.put(JobSubmitted(rdd, func2, partitions, false, callSite, listener, properties)) - return listener.awaitResult() // Will throw an exception if the job fails + eventQueue.put(JobSubmitted(rdd, func2, partitions, allowLocal = false, callSite, listener, properties)) + listener.awaitResult() // Will throw an exception if the job fails } /** @@ -340,11 +340,11 @@ class DAGScheduler( private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = { event match { case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener, properties) => - val runId = nextRunId.getAndIncrement() - val finalStage = newStage(finalRDD, None, runId, Some(callSite)) - val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener, properties) + val jobId = nextJobId.getAndIncrement() + val finalStage = newStage(finalRDD, None, jobId, Some(callSite)) + val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties) clearCacheLocs() - logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length + + logInfo("Got job " + job.jobId + " (" + callSite + ") with " + partitions.length + " output partitions (allowLocal=" + allowLocal + ")") logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")") logInfo("Parents of final stage: " + finalStage.parents) @@ -354,7 +354,7 @@ class DAGScheduler( runLocally(job) } else { listenerBus.post(SparkListenerJobStart(job, properties)) - idToActiveJob(runId) = job + idToActiveJob(jobId) = job activeJobs += job resultStageToJob(finalStage) = job submitStage(finalStage) @@ -375,7 +375,7 @@ class DAGScheduler( handleTaskCompletion(completion) case TaskSetFailed(taskSet, reason) => - abortStage(idToStage(taskSet.stageId), reason) + abortStage(stageIdToStage(taskSet.stageId), reason) case StopDAGScheduler => // Cancel any active jobs @@ -386,7 +386,7 @@ class DAGScheduler( } return true } - return false + false } /** @@ -398,7 +398,7 @@ class DAGScheduler( clearCacheLocs() val failed2 = failed.toArray failed.clear() - for (stage <- failed2.sortBy(_.priority)) { + for (stage <- failed2.sortBy(_.jobId)) { submitStage(stage) } } @@ -416,7 +416,7 @@ class DAGScheduler( logTrace("failed: " + failed) val waiting2 = waiting.toArray waiting.clear() - for (stage <- waiting2.sortBy(_.priority)) { + for (stage <- waiting2.sortBy(_.jobId)) { submitStage(stage) } } @@ -463,7 +463,7 @@ class DAGScheduler( */ protected def runLocally(job: ActiveJob) { logInfo("Computing the requested partition locally") - new Thread("Local computation of job " + job.runId) { + new Thread("Local computation of job " + job.jobId) { override def run() { runLocallyWithinThread(job) } @@ -531,7 +531,7 @@ class DAGScheduler( } // must be run listener before possible NotSerializableException // should be "StageSubmitted" first and then "JobEnded" - val properties = idToActiveJob(stage.priority).properties + val properties = idToActiveJob(stage.jobId).properties listenerBus.post(SparkListenerStageSubmitted(stage, tasks.size, properties)) if (tasks.size > 0) { @@ -552,7 +552,7 @@ class DAGScheduler( myPending ++= tasks logDebug("New pending tasks: " + myPending) taskSched.submitTasks( - new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.priority, properties)) + new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties)) if (!stage.submissionTime.isDefined) { stage.submissionTime = Some(System.currentTimeMillis()) } @@ -569,7 +569,7 @@ class DAGScheduler( */ private def handleTaskCompletion(event: CompletionEvent) { val task = event.task - val stage = idToStage(task.stageId) + val stage = stageIdToStage(task.stageId) def markStageAsFinished(stage: Stage) = { val serviceTime = stage.submissionTime match { @@ -598,7 +598,7 @@ class DAGScheduler( job.numFinished += 1 // If the whole job has finished, remove it if (job.numFinished == job.numPartitions) { - idToActiveJob -= stage.priority + idToActiveJob -= stage.jobId activeJobs -= job resultStageToJob -= stage markStageAsFinished(stage) @@ -635,7 +635,7 @@ class DAGScheduler( mapOutputTracker.registerMapOutputs( stage.shuffleDep.get.shuffleId, stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray, - true) + changeGeneration = true) } clearCacheLocs() if (stage.outputLocs.count(_ == Nil) != 0) { @@ -669,7 +669,7 @@ class DAGScheduler( case FetchFailed(bmAddress, shuffleId, mapId, reduceId) => // Mark the stage that the reducer was in as unrunnable - val failedStage = idToStage(task.stageId) + val failedStage = stageIdToStage(task.stageId) running -= failedStage failed += failedStage // TODO: Cancel running tasks in the stage @@ -697,7 +697,7 @@ class DAGScheduler( case other => // Unrecognized failure - abort all jobs depending on this stage - abortStage(idToStage(task.stageId), task + " failed: " + other) + abortStage(stageIdToStage(task.stageId), task + " failed: " + other) } } @@ -718,7 +718,7 @@ class DAGScheduler( for ((shuffleId, stage) <- shuffleToMapStage) { stage.removeOutputsOnExecutor(execId) val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray - mapOutputTracker.registerMapOutputs(shuffleId, locs, true) + mapOutputTracker.registerMapOutputs(shuffleId, locs, changeGeneration = true) } if (shuffleToMapStage.isEmpty) { mapOutputTracker.incrementEpoch() @@ -750,7 +750,7 @@ class DAGScheduler( val error = new SparkException("Job failed: " + reason) job.listener.jobFailed(error) listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage)))) - idToActiveJob -= resultStage.priority + idToActiveJob -= resultStage.jobId activeJobs -= job resultStageToJob -= resultStage } @@ -774,7 +774,7 @@ class DAGScheduler( for (dep <- rdd.dependencies) { dep match { case shufDep: ShuffleDependency[_,_] => - val mapStage = getShuffleMapStage(shufDep, stage.priority) + val mapStage = getShuffleMapStage(shufDep, stage.jobId) if (!mapStage.isAvailable) { visitedStages += mapStage visit(mapStage.rdd) @@ -812,13 +812,13 @@ class DAGScheduler( } case _ => }) - return Nil + Nil } private def cleanup(cleanupTime: Long) { - var sizeBefore = idToStage.size - idToStage.clearOldValues(cleanupTime) - logInfo("idToStage " + sizeBefore + " --> " + idToStage.size) + var sizeBefore = stageIdToStage.size + stageIdToStage.clearOldValues(cleanupTime) + logInfo("stageIdToStage " + sizeBefore + " --> " + stageIdToStage.size) sizeBefore = shuffleToMapStage.size shuffleToMapStage.clearOldValues(cleanupTime) diff --git a/core/src/main/scala/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/spark/scheduler/DAGSchedulerSource.scala index 87d27cc70d..98c4fb7e59 100644 --- a/core/src/main/scala/spark/scheduler/DAGSchedulerSource.scala +++ b/core/src/main/scala/spark/scheduler/DAGSchedulerSource.scala @@ -21,7 +21,7 @@ private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler) extends }) metricRegistry.register(MetricRegistry.name("job", "allJobs", "number"), new Gauge[Int] { - override def getValue: Int = dagScheduler.nextRunId.get() + override def getValue: Int = dagScheduler.nextJobId.get() }) metricRegistry.register(MetricRegistry.name("job", "activeJobs", "number"), new Gauge[Int] { diff --git a/core/src/main/scala/spark/scheduler/JobLogger.scala b/core/src/main/scala/spark/scheduler/JobLogger.scala index 7194fcaa49..1bc9fabdff 100644 --- a/core/src/main/scala/spark/scheduler/JobLogger.scala +++ b/core/src/main/scala/spark/scheduler/JobLogger.scala @@ -102,7 +102,7 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { stageIDToJobID.get(stageID).foreach(jobID => jobLogInfo(jobID, info, withTime)) protected def buildJobDep(jobID: Int, stage: Stage) { - if (stage.priority == jobID) { + if (stage.jobId == jobID) { jobIDToStages.get(jobID) match { case Some(stageList) => stageList += stage case None => val stageList = new ListBuffer[Stage] @@ -178,12 +178,12 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { }else{ stageInfo = "STAGE_ID=" + stage.id + " RESULT_STAGE" } - if (stage.priority == jobID) { + if (stage.jobId == jobID) { jobLogInfo(jobID, indentString(indent) + stageInfo, false) recordRddInStageGraph(jobID, stage.rdd, indent) stage.parents.foreach(recordStageDepGraph(jobID, _, indent + 2)) } else - jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.priority, false) + jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.jobId, false) } // Record task metrics into job log files @@ -260,7 +260,7 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { override def onJobEnd(jobEnd: SparkListenerJobEnd) { val job = jobEnd.job - var info = "JOB_ID=" + job.runId + var info = "JOB_ID=" + job.jobId jobEnd.jobResult match { case JobSucceeded => info += " STATUS=SUCCESS" case JobFailed(exception, _) => @@ -268,8 +268,8 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { exception.getMessage.split("\\s+").foreach(info += _ + "_") case _ => } - jobLogInfo(job.runId, info.substring(0, info.length - 1).toUpperCase) - closeLogWriter(job.runId) + jobLogInfo(job.jobId, info.substring(0, info.length - 1).toUpperCase) + closeLogWriter(job.jobId) } protected def recordJobProperties(jobID: Int, properties: Properties) { @@ -282,11 +282,11 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging { override def onJobStart(jobStart: SparkListenerJobStart) { val job = jobStart.job val properties = jobStart.properties - createLogWriter(job.runId) - recordJobProperties(job.runId, properties) - buildJobDep(job.runId, job.finalStage) - recordStageDep(job.runId) - recordStageDepGraph(job.runId, job.finalStage) - jobLogInfo(job.runId, "JOB_ID=" + job.runId + " STATUS=STARTED") + createLogWriter(job.jobId) + recordJobProperties(job.jobId, properties) + buildJobDep(job.jobId, job.finalStage) + recordStageDep(job.jobId) + recordStageDepGraph(job.jobId, job.finalStage) + jobLogInfo(job.jobId, "JOB_ID=" + job.jobId + " STATUS=STARTED") } } diff --git a/core/src/main/scala/spark/scheduler/Stage.scala b/core/src/main/scala/spark/scheduler/Stage.scala index 5428daeb94..c599c00ac4 100644 --- a/core/src/main/scala/spark/scheduler/Stage.scala +++ b/core/src/main/scala/spark/scheduler/Stage.scala @@ -33,15 +33,16 @@ import spark.storage.BlockManagerId * initiated a job (e.g. count(), save(), etc). For shuffle map stages, we also track the nodes * that each output partition is on. * - * Each Stage also has a priority, which is (by default) based on the job it was submitted in. - * This allows Stages from earlier jobs to be computed first or recovered faster on failure. + * Each Stage also has a jobId, identifying the job that first submitted the stage. When FIFO + * scheduling is used, this allows Stages from earlier jobs to be computed first or recovered + * faster on failure. */ private[spark] class Stage( val id: Int, val rdd: RDD[_], val shuffleDep: Option[ShuffleDependency[_,_]], // Output shuffle if stage is a map stage val parents: List[Stage], - val priority: Int, + val jobId: Int, callSite: Option[String]) extends Logging { From 1630fbf838fff101e36324934a165f95fb324482 Mon Sep 17 00:00:00 2001 From: Mark Hamstra Date: Tue, 20 Aug 2013 00:17:16 -0700 Subject: [PATCH 080/136] changeGeneration --> changeEpoch renaming --- core/src/main/scala/spark/scheduler/DAGScheduler.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala index 7823d0c8cf..9402f18a0f 100644 --- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala @@ -635,7 +635,7 @@ class DAGScheduler( mapOutputTracker.registerMapOutputs( stage.shuffleDep.get.shuffleId, stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray, - changeGeneration = true) + changeEpoch = true) } clearCacheLocs() if (stage.outputLocs.count(_ == Nil) != 0) { @@ -718,7 +718,7 @@ class DAGScheduler( for ((shuffleId, stage) <- shuffleToMapStage) { stage.removeOutputsOnExecutor(execId) val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray - mapOutputTracker.registerMapOutputs(shuffleId, locs, changeGeneration = true) + mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true) } if (shuffleToMapStage.isEmpty) { mapOutputTracker.incrementEpoch() From 07e5c8b69560e071d521bfeab1d86b5f68d5bb57 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 20 Aug 2013 15:49:52 -0700 Subject: [PATCH 081/136] Set default Hadoop version to 1 --- ec2/spark_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 8080cc4172..182cbbbb7c 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -73,8 +73,8 @@ def parse_args(): parser.add_option("--spark-git-repo", default="https://github.com/mesos/spark", help="Github repo from which to checkout supplied commit hash") - parser.add_option("--hadoop-major-version", default="2", - help="Major version of Hadoop (default: 2)") + parser.add_option("--hadoop-major-version", default="1", + help="Major version of Hadoop (default: 1)") parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", help="Use SSH dynamic port forwarding to create a SOCKS proxy at " + From 1ede102ba5863f6cee27437b0adbc4d54cedffb3 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Tue, 13 Aug 2013 20:46:22 -0700 Subject: [PATCH 082/136] load balancing coalescer --- .../main/scala/spark/rdd/CoalescedRDD.scala | 207 +++++++++++++++++- core/src/test/scala/spark/RDDSuite.scala | 22 ++ 2 files changed, 218 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 2b5bf18541..09ae9a8fa6 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -19,11 +19,13 @@ package spark.rdd import spark.{Dependency, OneToOneDependency, NarrowDependency, RDD, Partition, TaskContext} import java.io.{ObjectOutputStream, IOException} +import scala.collection.mutable private[spark] case class CoalescedRDDPartition( index: Int, @transient rdd: RDD[_], - parentsIndices: Array[Int] + parentsIndices: Array[Int], + prefLoc: String = "" ) extends Partition { var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_)) @@ -33,6 +35,8 @@ private[spark] case class CoalescedRDDPartition( parents = parentsIndices.map(rdd.partitions(_)) oos.defaultWriteObject() } + + def getPreferredLocation = prefLoc } /** @@ -45,20 +49,24 @@ private[spark] case class CoalescedRDDPartition( */ class CoalescedRDD[T: ClassManifest]( @transient var prev: RDD[T], - maxPartitions: Int) + maxPartitions: Int, + balanceSlack: Double = 0.20 ) extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { - val prevSplits = prev.partitions - if (prevSplits.length < maxPartitions) { - prevSplits.map(_.index).map{idx => new CoalescedRDDPartition(idx, prev, Array(idx)) } - } else { - (0 until maxPartitions).map { i => - val rangeStart = ((i.toLong * prevSplits.length) / maxPartitions).toInt - val rangeEnd = (((i.toLong + 1) * prevSplits.length) / maxPartitions).toInt - new CoalescedRDDPartition(i, prev, (rangeStart until rangeEnd).toArray) - }.toArray + val res = mutable.ArrayBuffer[CoalescedRDDPartition]() + val targetLen = math.min(prev.partitions.length, maxPartitions) + val packer = new PartitionCoalescer(maxPartitions, prev, balanceSlack) + + packer.setupGroups(targetLen) // setup the groups (bins) and preferred locations + packer.throwBalls() // assign partitions (balls) to each group (bins) + + for ((pg, i) <- packer.groupArr.zipWithIndex) { + val ids = pg.list.map(_.index).toArray + res += new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) } + + res.toArray } override def compute(split: Partition, context: TaskContext): Iterator[T] = { @@ -79,3 +87,180 @@ class CoalescedRDD[T: ClassManifest]( prev = null } } + + +class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { + + // this class just keeps iterating and rotating infinitely over the partitions of the RDD + // next() returns the next preferred machine that a partition is replicated on + // the rotator first goes through the first replica copy of each partition, then second, then third + private class RotateLocations(prev: RDD[_]) extends Iterator[String] { + + private var it: Iterator[String] = resetIterator() + override val isEmpty = !it.hasNext + + // initializes/resets to start iterating from the beginning + private def resetIterator() = { + val i1 = prev.partitions.view.map( (p: Partition) => + { if (prev.preferredLocations(p).length > 0) Some(prev.preferredLocations(p)(0)) else None } ) + val i2 = prev.partitions.view.map( (p: Partition) => + { if (prev.preferredLocations(p).length > 1) Some(prev.preferredLocations(p)(1)) else None } ) + val i3 = prev.partitions.view.map( (p: Partition) => + { if (prev.preferredLocations(p).length > 2) Some(prev.preferredLocations(p)(2)) else None } ) + val res = List(i1,i2,i3) + res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) into one iterator + } + + // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD + def hasNext(): Boolean = !isEmpty + + // return the next preferredLocation of some partition of the RDD + def next(): String = { + if (it.hasNext) + it.next() + else { + it = resetIterator() // ran out of preferred locations, reset and rotate to the beginning + it.next() + } + } + } + + case class PartitionGroup(prefLoc: String = "") { + var list = mutable.MutableList[Partition]() + + def size = list.size + + // returns number of partitions that got locality in this group + def local = { + var loc: Int = 0 + list.foreach(p => if (prev.preferredLocations(p).contains(prefLoc)) loc += 1) + loc + } + + override def toString(): String = { + val localityStr = if (size == 0) "0" else (local*100. / size).toInt.toString + "PartitionGroup(\"" + prefLoc + "\") size: " + size + " locality: " + localityStr +"% \n" + // list.map("\t\t" + _.toString).mkString("\n") + "\n" + } + } + + def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size + def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = + if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) + + val rnd = new scala.util.Random(7919) // keep this class deterministic + + // each element of groupArr represents one coalesced partition + val groupArr = mutable.ArrayBuffer[PartitionGroup]() + + // hash used to check whether some machine is already in groupArr + val groupHash = mutable.Map[String, mutable.MutableList[PartitionGroup]]() + + // determines the tradeoff between load-balancing the partitions sizes and their locality + // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality + val slack = (balanceSlack * maxPartitions).toInt + + private var noLocality = true // if true if no preferredLocations exists for parent RDD + + /** + * Sorts and gets the least element of the list associated with key in groupHash + * The returned PartitionGroup is the least loaded of all groups that represent the machine "key" + * @param key string representing a partitioned group on preferred machine key + * @return Option of PartitionGroup that has least elements for key + */ + private def getLeastGroupHash(key: String): Option[PartitionGroup] = { + groupHash.get(key).map(_.sortWith(compare).head) + } + + /** + * Initializes targetLen partition groups and assigns a preferredLocation + * This uses coupon collector to estimate how many preferredLocations it must rotate through until it has seen + * most of the preferred locations (2 * n log(n)) + * @param targetLen + */ + def setupGroups(targetLen: Int) { + val rotIt = new RotateLocations(prev) + + // deal with empty rotator case, just create targetLen partition groups with no preferred location + if (!rotIt.hasNext()) { + (1 to targetLen).foreach(x => groupArr += PartitionGroup()) + return + } + + noLocality = false + + // number of iterations needed to be certain that we've seen most preferred locations + val expectedCoupons2 = 2 * (math.log(targetLen)*targetLen + targetLen + 0.5).toInt + var numCreated = 0 + var tries = 0 + + // rotate through until either targetLen unique/distinct preferred locations have been created OR + // we've rotated expectedCoupons2, in which case we have likely seen all preferred locations, i.e. + // likely targetLen >> number of preferred locations (more buckets than there are machines) + while (numCreated < targetLen && tries < expectedCoupons2) { + tries += 1 + val nxt = rotIt.next() + if (!groupHash.contains(nxt)) { + val pgroup = PartitionGroup(nxt) + groupArr += pgroup + groupHash += (nxt -> (mutable.MutableList(pgroup))) // use list in case we have multiple groups for same machine + numCreated += 1 + } + } + + while (numCreated < targetLen) { // if we don't have enough partition groups, just create duplicates + val nxt = rotIt.next() + val pgroup = PartitionGroup(nxt) + groupArr += pgroup + groupHash.get(nxt).get += pgroup + numCreated += 1 + } + } + + /** + * Takes a parent RDD partition and decides which of the partition groups to put it in + * Takes locality into account, but also uses power of 2 choices to load balance + * It strikes a balance between the two use the balanceSlack variable + * @param p partition (ball to be thrown) + * @return partition group (bin to be put in) + */ + def pickBin(p: Partition): PartitionGroup = { + val pref = prev.preferredLocations(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded bin of replicas + val prefPart = if (pref == Nil) None else pref.head + + val r1 = rnd.nextInt(groupArr.size) + val r2 = rnd.nextInt(groupArr.size) + val minPowerOfTwo = if (groupArr(r1).size < groupArr(r2).size) groupArr(r1) else groupArr(r2) // power of 2 + if (prefPart== None) // if no preferred locations, just use basic power of two + return minPowerOfTwo + + val prefPartActual = prefPart.get + + if (minPowerOfTwo.size + slack <= prefPartActual.size) // more imbalance than the slack allows + return minPowerOfTwo // prefer balance over locality + else { + return prefPartActual // prefer locality over balance + } + } + + def throwBalls() { + if (noLocality) { // no preferredLocations in parent RDD, no randomization needed + if (maxPartitions > groupArr.size) { // just return prev.partitions + for ((p,i) <- prev.partitions.zipWithIndex) { + groupArr(i).list += p + } + } else { // old code, just splits them grouping partitions that are next to each other in the array + (0 until maxPartitions).foreach { i => + val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt + val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt + (rangeStart until rangeEnd).foreach{ j => groupArr(i).list += prev.partitions(j) } + } + } + } else { + for (p <- prev.partitions) { // throw every partition (ball) into a partition group (bin) + pickBin(p).list += p + } + } + } + +} diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 75778de1cc..881bdedfe5 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -173,6 +173,28 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _, _]] != null) } + test("cogrouped RDDs with locality") { + // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 + val data = sc.makeRDD((1 to 9).map( i => (i, (i to (i+2)).map{ j => "m" + (j%6)} ))) + val coalesced1 = data.coalesce(3) + assert(coalesced1.collect().toList.sorted === (1 to 9).toList) // no data lost (NB: order might reshuffle) + + val splits = coalesced1.glom().collect().map(_.toList).toList + assert(splits.length === 3) // ensure it indeed created 3 partitions + + assert(splits.foldLeft(true)( (x,y) => if (!x) false else y.length >= 2) === true) // descent balance (2+ per bin) + + val prefs = List(List("m1","m2","m3"), List("m4","m5","m6")) + val data2 = sc.makeRDD((1 to 100).map( i => (i, prefs(i % 2) ))) // alternate machine prefs + val coalesced2 = data2.coalesce(10) + val splits2 = coalesced2.glom().collect().map(_.toList).toList + + // this gives a list of pairs, each pair is of the form (even,odd), where even is the number of even elements... + val list = splits2.map( ls => ls.foldLeft((0,0))( (x,y) => if (y % 2 == 0) (x._1+1,x._2) else (x._1,x._2+1)) ) + val maxes = list.map( { case (a,b) => if (a>b) a else b } ) // get the maxs, this represents the locality + maxes.foreach( locality => assert( locality > 7) ) // at least 70% locality in each partition + + } test("zipped RDDs") { val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) From 66edf854aa585d23e47fc0bfb7fdd4e23c0ea592 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Wed, 14 Aug 2013 11:15:39 -0700 Subject: [PATCH 083/136] Bug, should compute slack wrt parent partition size, not number of bins --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 09ae9a8fa6..f999e9b0ec 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -50,7 +50,7 @@ private[spark] case class CoalescedRDDPartition( class CoalescedRDD[T: ClassManifest]( @transient var prev: RDD[T], maxPartitions: Int, - balanceSlack: Double = 0.20 ) + balanceSlack: Double = 0.10 ) extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { @@ -158,7 +158,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) // determines the tradeoff between load-balancing the partitions sizes and their locality // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality - val slack = (balanceSlack * maxPartitions).toInt + val slack = (balanceSlack * prev.partitions.size).toInt private var noLocality = true // if true if no preferredLocations exists for parent RDD From 7a2a33e32dede41937570ec77cf1dfad070e963f Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Wed, 14 Aug 2013 19:40:24 -0700 Subject: [PATCH 084/136] Large scale load and locality tests for the coalesced partitions added --- .../main/scala/spark/rdd/CoalescedRDD.scala | 143 +++++++++++------- core/src/test/scala/spark/RDDSuite.scala | 38 +++-- 2 files changed, 118 insertions(+), 63 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index f999e9b0ec..61c4d0c004 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -36,7 +36,21 @@ private[spark] case class CoalescedRDDPartition( oos.defaultWriteObject() } + /** + * Gets the preferred location for this coalesced RDD partition. Most parent indices should prefer this machine. + * @return preferred location + */ def getPreferredLocation = prefLoc + + /** + * Computes how many of the parents partitions have getPreferredLocation as one of their preferredLocations + * @return locality of this coalesced partition between 0 and 1 + */ + def localFraction :Double = { + var loc: Int = 0 + parents.foreach(p => if (rdd.preferredLocations(p).contains(getPreferredLocation)) loc += 1) + if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) + } } /** @@ -55,13 +69,9 @@ class CoalescedRDD[T: ClassManifest]( override def getPartitions: Array[Partition] = { val res = mutable.ArrayBuffer[CoalescedRDDPartition]() - val targetLen = math.min(prev.partitions.length, maxPartitions) val packer = new PartitionCoalescer(maxPartitions, prev, balanceSlack) - packer.setupGroups(targetLen) // setup the groups (bins) and preferred locations - packer.throwBalls() // assign partitions (balls) to each group (bins) - - for ((pg, i) <- packer.groupArr.zipWithIndex) { + for ((pg, i) <- packer.getPartitions.zipWithIndex) { val ids = pg.list.map(_.index).toArray res += new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) } @@ -86,27 +96,68 @@ class CoalescedRDD[T: ClassManifest]( super.clearDependencies() prev = null } + + /** + * Returns the preferred machine for the split. If split is of type CoalescedRDDPartition, then the preferred machine + * will be one which most parent splits prefer too. + * @param split + * @return the machine most preferred by split + */ + override def getPreferredLocations(split: Partition): Seq[String] = { + if (split.isInstanceOf[CoalescedRDDPartition]) + List(split.asInstanceOf[CoalescedRDDPartition].getPreferredLocation) + else + super.getPreferredLocations(split) + } + } class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { + private def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size + private def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = + if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) + + private val rnd = new scala.util.Random(7919) // keep this class deterministic + + // each element of groupArr represents one coalesced partition + private val groupArr = mutable.ArrayBuffer[PartitionGroup]() + + // hash used to check whether some machine is already in groupArr + private val groupHash = mutable.Map[String, mutable.ListBuffer[PartitionGroup]]() + + // hash used for the first maxPartitions (to avoid duplicates) + private val initialHash = mutable.Map[Partition, Boolean]() + + // determines the tradeoff between load-balancing the partitions sizes and their locality + // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality + private val slack = (balanceSlack * prev.partitions.size).toInt + + private var noLocality = true // if true if no preferredLocations exists for parent RDD + + this.setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) and preferred locations + this.throwBalls() // assign partitions (balls) to each group (bins) + + def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on // the rotator first goes through the first replica copy of each partition, then second, then third - private class RotateLocations(prev: RDD[_]) extends Iterator[String] { + // the iterators return type is a tuple: (replicaString, partition) + private class RotateLocations(prev: RDD[_]) extends Iterator[(String, Partition)] { - private var it: Iterator[String] = resetIterator() + private var it: Iterator[(String, Partition)] = resetIterator() override val isEmpty = !it.hasNext // initializes/resets to start iterating from the beginning private def resetIterator() = { val i1 = prev.partitions.view.map( (p: Partition) => - { if (prev.preferredLocations(p).length > 0) Some(prev.preferredLocations(p)(0)) else None } ) + { if (prev.preferredLocations(p).length > 0) Some((prev.preferredLocations(p)(0),p)) else None } ) val i2 = prev.partitions.view.map( (p: Partition) => - { if (prev.preferredLocations(p).length > 1) Some(prev.preferredLocations(p)(1)) else None } ) + { if (prev.preferredLocations(p).length > 1) Some((prev.preferredLocations(p)(1),p)) else None } ) val i3 = prev.partitions.view.map( (p: Partition) => - { if (prev.preferredLocations(p).length > 2) Some(prev.preferredLocations(p)(2)) else None } ) + { if (prev.preferredLocations(p).length > 2) Some((prev.preferredLocations(p)(2),p)) else None } ) val res = List(i1,i2,i3) res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) into one iterator } @@ -115,7 +166,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) def hasNext(): Boolean = !isEmpty // return the next preferredLocation of some partition of the RDD - def next(): String = { + def next(): (String, Partition) = { if (it.hasNext) it.next() else { @@ -126,42 +177,11 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } case class PartitionGroup(prefLoc: String = "") { - var list = mutable.MutableList[Partition]() + var list = mutable.ListBuffer[Partition]() def size = list.size - - // returns number of partitions that got locality in this group - def local = { - var loc: Int = 0 - list.foreach(p => if (prev.preferredLocations(p).contains(prefLoc)) loc += 1) - loc - } - - override def toString(): String = { - val localityStr = if (size == 0) "0" else (local*100. / size).toInt.toString - "PartitionGroup(\"" + prefLoc + "\") size: " + size + " locality: " + localityStr +"% \n" - // list.map("\t\t" + _.toString).mkString("\n") + "\n" - } } - def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size - def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = - if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) - - val rnd = new scala.util.Random(7919) // keep this class deterministic - - // each element of groupArr represents one coalesced partition - val groupArr = mutable.ArrayBuffer[PartitionGroup]() - - // hash used to check whether some machine is already in groupArr - val groupHash = mutable.Map[String, mutable.MutableList[PartitionGroup]]() - - // determines the tradeoff between load-balancing the partitions sizes and their locality - // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality - val slack = (balanceSlack * prev.partitions.size).toInt - - private var noLocality = true // if true if no preferredLocations exists for parent RDD - /** * Sorts and gets the least element of the list associated with key in groupHash * The returned PartitionGroup is the least loaded of all groups that represent the machine "key" @@ -172,13 +192,21 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) groupHash.get(key).map(_.sortWith(compare).head) } + def addPartToPGroup(part : Partition, pgroup : PartitionGroup) : Boolean = { + if (!initialHash.contains(part)) { + pgroup.list += part // already preassign this element, ensures every bucket will have 1 element + initialHash += (part -> true) // needed to avoid assigning partitions to multiple buckets/groups + true + } else false + } + /** * Initializes targetLen partition groups and assigns a preferredLocation * This uses coupon collector to estimate how many preferredLocations it must rotate through until it has seen * most of the preferred locations (2 * n log(n)) * @param targetLen */ - def setupGroups(targetLen: Int) { + private def setupGroups(targetLen: Int) { val rotIt = new RotateLocations(prev) // deal with empty rotator case, just create targetLen partition groups with no preferred location @@ -199,22 +227,29 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) // likely targetLen >> number of preferred locations (more buckets than there are machines) while (numCreated < targetLen && tries < expectedCoupons2) { tries += 1 - val nxt = rotIt.next() - if (!groupHash.contains(nxt)) { - val pgroup = PartitionGroup(nxt) + val (nxt_replica, nxt_part) = rotIt.next() + if (!groupHash.contains(nxt_replica)) { + val pgroup = PartitionGroup(nxt_replica) groupArr += pgroup - groupHash += (nxt -> (mutable.MutableList(pgroup))) // use list in case we have multiple groups for same machine + addPartToPGroup(nxt_part, pgroup) + groupHash += (nxt_replica -> (mutable.ListBuffer(pgroup))) // list in case we have multiple groups per machine numCreated += 1 } } while (numCreated < targetLen) { // if we don't have enough partition groups, just create duplicates - val nxt = rotIt.next() - val pgroup = PartitionGroup(nxt) + var (nxt_replica, nxt_part) = rotIt.next() + val pgroup = PartitionGroup(nxt_replica) groupArr += pgroup - groupHash.get(nxt).get += pgroup + groupHash.get(nxt_replica).get += pgroup + var tries = 0 + while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure each group has at least one partition + nxt_part = rotIt.next()._2 + tries += 1 + } numCreated += 1 } + } /** @@ -224,7 +259,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) * @param p partition (ball to be thrown) * @return partition group (bin to be put in) */ - def pickBin(p: Partition): PartitionGroup = { + private def pickBin(p: Partition): PartitionGroup = { val pref = prev.preferredLocations(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded bin of replicas val prefPart = if (pref == Nil) None else pref.head @@ -243,7 +278,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } } - def throwBalls() { + private def throwBalls() { if (noLocality) { // no preferredLocations in parent RDD, no randomization needed if (maxPartitions > groupArr.size) { // just return prev.partitions for ((p,i) <- prev.partitions.zipWithIndex) { @@ -257,7 +292,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } } } else { - for (p <- prev.partitions) { // throw every partition (ball) into a partition group (bin) + for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into a partition group pickBin(p).list += p } } diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 881bdedfe5..c200bfe909 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -22,7 +22,8 @@ import org.scalatest.FunSuite import org.scalatest.concurrent.Timeouts._ import org.scalatest.time.{Span, Millis} import spark.SparkContext._ -import spark.rdd.{CoalescedRDD, CoGroupedRDD, EmptyRDD, PartitionPruningRDD, ShuffledRDD} +import spark.rdd._ +import scala.collection.parallel.mutable class RDDSuite extends FunSuite with SharedSparkContext { @@ -184,16 +185,35 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(splits.foldLeft(true)( (x,y) => if (!x) false else y.length >= 2) === true) // descent balance (2+ per bin) - val prefs = List(List("m1","m2","m3"), List("m4","m5","m6")) - val data2 = sc.makeRDD((1 to 100).map( i => (i, prefs(i % 2) ))) // alternate machine prefs - val coalesced2 = data2.coalesce(10) - val splits2 = coalesced2.glom().collect().map(_.toList).toList + // If we try to coalesce into more partitions than the original RDD, it should just + // keep the original number of partitions. + val coalesced4 = data.coalesce(20) + assert(coalesced4.glom().collect().map(_.toList).toList.sortWith( + (x, y) => if (x.isEmpty) false else if (y.isEmpty) true else x(0) < y(0)) === (1 to 9).map(x => List(x)).toList) - // this gives a list of pairs, each pair is of the form (even,odd), where even is the number of even elements... - val list = splits2.map( ls => ls.foldLeft((0,0))( (x,y) => if (y % 2 == 0) (x._1+1,x._2) else (x._1,x._2+1)) ) - val maxes = list.map( { case (a,b) => if (a>b) a else b } ) // get the maxs, this represents the locality - maxes.foreach( locality => assert( locality > 7) ) // at least 70% locality in each partition + // large scale experiment + import collection.mutable + val rnd = scala.util.Random + val partitions = 10000 + val numMachines = 50 + val machines = mutable.ListBuffer[String]() + (1 to numMachines).foreach(machines += "m"+_) + + val blocks = (1 to partitions).map( i => (i, (i to (i+2)).map{ j => machines(rnd.nextInt(machines.size)) } )) + + val data2 = sc.makeRDD(blocks) + val coalesced2 = data2.coalesce(numMachines*2) + + // test that you get over 95% locality in each group + val minLocality = coalesced2.partitions.map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) + .foldLeft(100.)( (perc, loc) => math.min(perc,loc) ) + assert(minLocality > 0.95) + + // test that the groups are load balanced with 100 +/- 15 elements in each + val maxImbalance = coalesced2.partitions.map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) + .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) + assert(maxImbalance < 15) } test("zipped RDDs") { From c4d59910b149b8b9bbf729f38e3eef3fb64fc85b Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Wed, 14 Aug 2013 19:59:21 -0700 Subject: [PATCH 085/136] added goals inline as comment --- .../main/scala/spark/rdd/CoalescedRDD.scala | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 61c4d0c004..6af55cd80c 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -60,6 +60,27 @@ private[spark] case class CoalescedRDDPartition( * * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, * or to avoid having a large number of small tasks when processing a directory with many files. + * + * If there is no locality information (no preferredLocations) in the parent RDD, then the coalescing + * is very simple: chunk parents that are close in the Array in chunks. + * If there is locality information, it proceeds to pack them with the following three goals in mind: + * + * (1) Balance the groups so they roughly have the same number of parent partitions + * (2) Achieve locality per partition, i.e. there exists one machine which most parent partitions prefer + * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (underlying problem is likely NP-hard) + * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine + * + * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000. + * We assume the final number of desired partitions is small, e.g. less than 1000. + * + * The algorithm tries to assign unique preferred machines to each partition. If the number of desired + * partitions is greater than the number of preferred machines (can happen), it needs to start picking + * duplicate preferred machines. This is determined using coupon collector estimation (2n log(n)). + * The load balancing is done using power-of-two randomized bins-balls with one twist: it tries to + * also achieve locality. This is done by allowing a slack (balanceSlack) between two bins. + * If two bins are within the slack in terms of balance, the algorithm will assign partitions + * according to locality. (contact alig for questions) + * */ class CoalescedRDD[T: ClassManifest]( @transient var prev: RDD[T], From 937f72feb86b406056bd163ce7320f582b70ab16 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Wed, 14 Aug 2013 20:21:53 -0700 Subject: [PATCH 086/136] word wrap before 100 chars per line --- .../main/scala/spark/rdd/CoalescedRDD.scala | 75 ++++++++++--------- core/src/test/scala/spark/RDDSuite.scala | 17 +++-- 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 6af55cd80c..beed2d5b69 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -37,13 +37,15 @@ private[spark] case class CoalescedRDDPartition( } /** - * Gets the preferred location for this coalesced RDD partition. Most parent indices should prefer this machine. + * Gets the preferred location for this coalesced RDD partition. + * Most parent indices should prefer this machine. * @return preferred location */ def getPreferredLocation = prefLoc /** - * Computes how many of the parents partitions have getPreferredLocation as one of their preferredLocations + * Computes how many of the parents partitions have getPreferredLocation + * as one of their preferredLocations * @return locality of this coalesced partition between 0 and 1 */ def localFraction :Double = { @@ -61,24 +63,24 @@ private[spark] case class CoalescedRDDPartition( * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, * or to avoid having a large number of small tasks when processing a directory with many files. * - * If there is no locality information (no preferredLocations) in the parent RDD, then the coalescing + * If there is no locality information (no preferredLocations) in the parent, then the coalescing * is very simple: chunk parents that are close in the Array in chunks. - * If there is locality information, it proceeds to pack them with the following three goals in mind: + * If there is locality information, it proceeds to pack them with the following three goals: * * (1) Balance the groups so they roughly have the same number of parent partitions - * (2) Achieve locality per partition, i.e. there exists one machine which most parent partitions prefer - * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (underlying problem is likely NP-hard) + * (2) Achieve locality per partition, i.e. there exists one machine which most parent splits prefer + * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard) * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine * * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000. * We assume the final number of desired partitions is small, e.g. less than 1000. * - * The algorithm tries to assign unique preferred machines to each partition. If the number of desired - * partitions is greater than the number of preferred machines (can happen), it needs to start picking - * duplicate preferred machines. This is determined using coupon collector estimation (2n log(n)). - * The load balancing is done using power-of-two randomized bins-balls with one twist: it tries to - * also achieve locality. This is done by allowing a slack (balanceSlack) between two bins. - * If two bins are within the slack in terms of balance, the algorithm will assign partitions + * The algorithm tries to assign unique preferred machines to each partition. If the number of + * desired partitions is greater than the number of preferred machines (can happen), it needs to + * start picking duplicate preferred machines. This is determined using coupon collector estimation + * (2n log(n)). The load balancing is done using power-of-two randomized bins-balls with one twist: + * it tries to also achieve locality. This is done by allowing a slack (balanceSlack) between two + * bins. If two bins are within the slack in terms of balance, the algorithm will assign partitions * according to locality. (contact alig for questions) * */ @@ -119,8 +121,8 @@ class CoalescedRDD[T: ClassManifest]( } /** - * Returns the preferred machine for the split. If split is of type CoalescedRDDPartition, then the preferred machine - * will be one which most parent splits prefer too. + * Returns the preferred machine for the split. If split is of type CoalescedRDDPartition, + * then the preferred machine will be one which most parent splits prefer too. * @param split * @return the machine most preferred by split */ @@ -157,14 +159,14 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) private var noLocality = true // if true if no preferredLocations exists for parent RDD - this.setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) and preferred locations + this.setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) this.throwBalls() // assign partitions (balls) to each group (bins) def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on - // the rotator first goes through the first replica copy of each partition, then second, then third + // the rotator first goes through the first replica copy of each partition, then second, third // the iterators return type is a tuple: (replicaString, partition) private class RotateLocations(prev: RDD[_]) extends Iterator[(String, Partition)] { @@ -174,13 +176,16 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) // initializes/resets to start iterating from the beginning private def resetIterator() = { val i1 = prev.partitions.view.map( (p: Partition) => - { if (prev.preferredLocations(p).length > 0) Some((prev.preferredLocations(p)(0),p)) else None } ) + { if (prev.preferredLocations(p).length > 0) + Some((prev.preferredLocations(p)(0),p)) else None } ) val i2 = prev.partitions.view.map( (p: Partition) => - { if (prev.preferredLocations(p).length > 1) Some((prev.preferredLocations(p)(1),p)) else None } ) + { if (prev.preferredLocations(p).length > 1) + Some((prev.preferredLocations(p)(1),p)) else None } ) val i3 = prev.partitions.view.map( (p: Partition) => - { if (prev.preferredLocations(p).length > 2) Some((prev.preferredLocations(p)(2),p)) else None } ) + { if (prev.preferredLocations(p).length > 2) + Some((prev.preferredLocations(p)(2),p)) else None } ) val res = List(i1,i2,i3) - res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) into one iterator + res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) } // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD @@ -215,22 +220,22 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) def addPartToPGroup(part : Partition, pgroup : PartitionGroup) : Boolean = { if (!initialHash.contains(part)) { - pgroup.list += part // already preassign this element, ensures every bucket will have 1 element - initialHash += (part -> true) // needed to avoid assigning partitions to multiple buckets/groups + pgroup.list += part // already assign this element + initialHash += (part -> true) // needed to avoid assigning partitions to multiple buckets true } else false } /** * Initializes targetLen partition groups and assigns a preferredLocation - * This uses coupon collector to estimate how many preferredLocations it must rotate through until it has seen - * most of the preferred locations (2 * n log(n)) + * This uses coupon collector to estimate how many preferredLocations it must rotate through + * until it has seen most of the preferred locations (2 * n log(n)) * @param targetLen */ private def setupGroups(targetLen: Int) { val rotIt = new RotateLocations(prev) - // deal with empty rotator case, just create targetLen partition groups with no preferred location + // deal with empty case, just create targetLen partition groups with no preferred location if (!rotIt.hasNext()) { (1 to targetLen).foreach(x => groupArr += PartitionGroup()) return @@ -243,9 +248,9 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) var numCreated = 0 var tries = 0 - // rotate through until either targetLen unique/distinct preferred locations have been created OR - // we've rotated expectedCoupons2, in which case we have likely seen all preferred locations, i.e. - // likely targetLen >> number of preferred locations (more buckets than there are machines) + // rotate through until either targetLen unique/distinct preferred locations have been created + // OR we've rotated expectedCoupons2, in which case we have likely seen all preferred locations, + // i.e. likely targetLen >> number of preferred locations (more buckets than there are machines) while (numCreated < targetLen && tries < expectedCoupons2) { tries += 1 val (nxt_replica, nxt_part) = rotIt.next() @@ -253,18 +258,18 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) val pgroup = PartitionGroup(nxt_replica) groupArr += pgroup addPartToPGroup(nxt_part, pgroup) - groupHash += (nxt_replica -> (mutable.ListBuffer(pgroup))) // list in case we have multiple groups per machine + groupHash += (nxt_replica -> (mutable.ListBuffer(pgroup))) // list in case we have multiple numCreated += 1 } } - while (numCreated < targetLen) { // if we don't have enough partition groups, just create duplicates + while (numCreated < targetLen) { // if we don't have enough partition groups, create duplicates var (nxt_replica, nxt_part) = rotIt.next() val pgroup = PartitionGroup(nxt_replica) groupArr += pgroup groupHash.get(nxt_replica).get += pgroup var tries = 0 - while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure each group has at least one partition + while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure at least one part nxt_part = rotIt.next()._2 tries += 1 } @@ -281,12 +286,12 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) * @return partition group (bin to be put in) */ private def pickBin(p: Partition): PartitionGroup = { - val pref = prev.preferredLocations(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded bin of replicas + val pref = prev.preferredLocations(p).map(getLeastGroupHash(_)).sortWith(compare) // least load val prefPart = if (pref == Nil) None else pref.head val r1 = rnd.nextInt(groupArr.size) val r2 = rnd.nextInt(groupArr.size) - val minPowerOfTwo = if (groupArr(r1).size < groupArr(r2).size) groupArr(r1) else groupArr(r2) // power of 2 + val minPowerOfTwo = if (groupArr(r1).size < groupArr(r2).size) groupArr(r1) else groupArr(r2) if (prefPart== None) // if no preferred locations, just use basic power of two return minPowerOfTwo @@ -305,7 +310,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) for ((p,i) <- prev.partitions.zipWithIndex) { groupArr(i).list += p } - } else { // old code, just splits them grouping partitions that are next to each other in the array + } else { // old code, just splits them grouping partitions that are next to each other (0 until maxPartitions).foreach { i => val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt @@ -313,7 +318,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } } } else { - for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into a partition group + for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into group pickBin(p).list += p } } diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index c200bfe909..a757aebd65 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -178,18 +178,20 @@ class RDDSuite extends FunSuite with SharedSparkContext { // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 val data = sc.makeRDD((1 to 9).map( i => (i, (i to (i+2)).map{ j => "m" + (j%6)} ))) val coalesced1 = data.coalesce(3) - assert(coalesced1.collect().toList.sorted === (1 to 9).toList) // no data lost (NB: order might reshuffle) + assert(coalesced1.collect().toList.sorted === (1 to 9).toList) // no data lost val splits = coalesced1.glom().collect().map(_.toList).toList assert(splits.length === 3) // ensure it indeed created 3 partitions - assert(splits.foldLeft(true)( (x,y) => if (!x) false else y.length >= 2) === true) // descent balance (2+ per bin) + assert(splits.foldLeft(true) + ( (x,y) => if (!x) false else y.length >= 2) === true) // (2+ balance) // If we try to coalesce into more partitions than the original RDD, it should just // keep the original number of partitions. val coalesced4 = data.coalesce(20) assert(coalesced4.glom().collect().map(_.toList).toList.sortWith( - (x, y) => if (x.isEmpty) false else if (y.isEmpty) true else x(0) < y(0)) === (1 to 9).map(x => List(x)).toList) + (x, y) => if (x.isEmpty) false else if (y.isEmpty) true else x(0) < y(0)) === (1 to 9). + map(x => List(x)).toList) // large scale experiment @@ -200,18 +202,21 @@ class RDDSuite extends FunSuite with SharedSparkContext { val machines = mutable.ListBuffer[String]() (1 to numMachines).foreach(machines += "m"+_) - val blocks = (1 to partitions).map( i => (i, (i to (i+2)).map{ j => machines(rnd.nextInt(machines.size)) } )) + val blocks = (1 to partitions).map( i => (i, (i to (i+2)) + .map{ j => machines(rnd.nextInt(machines.size)) } )) val data2 = sc.makeRDD(blocks) val coalesced2 = data2.coalesce(numMachines*2) // test that you get over 95% locality in each group - val minLocality = coalesced2.partitions.map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) + val minLocality = coalesced2.partitions + .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) .foldLeft(100.)( (perc, loc) => math.min(perc,loc) ) assert(minLocality > 0.95) // test that the groups are load balanced with 100 +/- 15 elements in each - val maxImbalance = coalesced2.partitions.map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) + val maxImbalance = coalesced2.partitions + .map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) assert(maxImbalance < 15) } From f6e47e8b51c55a664ce062dbe8a320591644ad62 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 10:58:39 -0700 Subject: [PATCH 087/136] Renamed split to partition --- .../main/scala/spark/rdd/CoalescedRDD.scala | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index beed2d5b69..f46dd1ee6c 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -31,7 +31,7 @@ private[spark] case class CoalescedRDDPartition( @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream) { - // Update the reference to parent split at the time of task serialization + // Update the reference to parent partition at the time of task serialization parents = parentsIndices.map(rdd.partitions(_)) oos.defaultWriteObject() } @@ -68,7 +68,7 @@ private[spark] case class CoalescedRDDPartition( * If there is locality information, it proceeds to pack them with the following three goals: * * (1) Balance the groups so they roughly have the same number of parent partitions - * (2) Achieve locality per partition, i.e. there exists one machine which most parent splits prefer + * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard) * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine * @@ -102,9 +102,9 @@ class CoalescedRDD[T: ClassManifest]( res.toArray } - override def compute(split: Partition, context: TaskContext): Iterator[T] = { - split.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentSplit => - firstParent[T].iterator(parentSplit, context) + override def compute(partition: Partition, context: TaskContext): Iterator[T] = { + partition.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentPartition => + firstParent[T].iterator(parentPartition, context) } } @@ -121,16 +121,16 @@ class CoalescedRDD[T: ClassManifest]( } /** - * Returns the preferred machine for the split. If split is of type CoalescedRDDPartition, + * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition, * then the preferred machine will be one which most parent splits prefer too. - * @param split + * @param partition * @return the machine most preferred by split */ - override def getPreferredLocations(split: Partition): Seq[String] = { - if (split.isInstanceOf[CoalescedRDDPartition]) - List(split.asInstanceOf[CoalescedRDDPartition].getPreferredLocation) + override def getPreferredLocations(partition: Partition): Seq[String] = { + if (partition.isInstanceOf[CoalescedRDDPartition]) + List(partition.asInstanceOf[CoalescedRDDPartition].getPreferredLocation) else - super.getPreferredLocations(split) + super.getPreferredLocations(partition) } } From f24861b60a9ddef1369a0a6816f6922575940656 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 11:00:54 -0700 Subject: [PATCH 088/136] Fix bug in tests --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 2 +- core/src/test/scala/spark/RDDSuite.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index f46dd1ee6c..205175edfc 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -310,7 +310,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) for ((p,i) <- prev.partitions.zipWithIndex) { groupArr(i).list += p } - } else { // old code, just splits them grouping partitions that are next to each other + } else { // no locality available, then simply split partitions based on positions in array (0 until maxPartitions).foreach { i => val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index a757aebd65..8b6fa9c81e 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -184,7 +184,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(splits.length === 3) // ensure it indeed created 3 partitions assert(splits.foldLeft(true) - ( (x,y) => if (!x) false else y.length >= 2) === true) // (2+ balance) + ( (x,y) => if (!x) false else y.length >= 1) === true) // (1+ balance) // If we try to coalesce into more partitions than the original RDD, it should just // keep the original number of partitions. @@ -208,17 +208,17 @@ class RDDSuite extends FunSuite with SharedSparkContext { val data2 = sc.makeRDD(blocks) val coalesced2 = data2.coalesce(numMachines*2) - // test that you get over 95% locality in each group + // test that you get over 90% locality in each group val minLocality = coalesced2.partitions .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) .foldLeft(100.)( (perc, loc) => math.min(perc,loc) ) - assert(minLocality > 0.95) + assert(minLocality > 0.90) - // test that the groups are load balanced with 100 +/- 15 elements in each + // test that the groups are load balanced with 100 +/- 20 elements in each val maxImbalance = coalesced2.partitions .map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) - assert(maxImbalance < 15) + assert(maxImbalance < 20) } test("zipped RDDs") { From f67753cdfcb08b3786320c45af98e097a8ed5d39 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 11:03:42 -0700 Subject: [PATCH 089/136] made preferredLocation a val of the surrounding case class --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 205175edfc..bfd0cf257e 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -25,7 +25,7 @@ private[spark] case class CoalescedRDDPartition( index: Int, @transient rdd: RDD[_], parentsIndices: Array[Int], - prefLoc: String = "" + @transient preferredLocation: String = "" ) extends Partition { var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_)) @@ -36,13 +36,6 @@ private[spark] case class CoalescedRDDPartition( oos.defaultWriteObject() } - /** - * Gets the preferred location for this coalesced RDD partition. - * Most parent indices should prefer this machine. - * @return preferred location - */ - def getPreferredLocation = prefLoc - /** * Computes how many of the parents partitions have getPreferredLocation * as one of their preferredLocations @@ -50,7 +43,7 @@ private[spark] case class CoalescedRDDPartition( */ def localFraction :Double = { var loc: Int = 0 - parents.foreach(p => if (rdd.preferredLocations(p).contains(getPreferredLocation)) loc += 1) + parents.foreach(p => if (rdd.preferredLocations(p).contains(preferredLocation)) loc += 1) if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) } } @@ -128,7 +121,7 @@ class CoalescedRDD[T: ClassManifest]( */ override def getPreferredLocations(partition: Partition): Seq[String] = { if (partition.isInstanceOf[CoalescedRDDPartition]) - List(partition.asInstanceOf[CoalescedRDDPartition].getPreferredLocation) + List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation) else super.getPreferredLocations(partition) } From 4f99be1ffd2f5237566b143b317e3677b50d06e8 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 11:04:38 -0700 Subject: [PATCH 090/136] use count rather than foreach --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index bfd0cf257e..450d8b33a9 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -42,8 +42,7 @@ private[spark] case class CoalescedRDDPartition( * @return locality of this coalesced partition between 0 and 1 */ def localFraction :Double = { - var loc: Int = 0 - parents.foreach(p => if (rdd.preferredLocations(p).contains(preferredLocation)) loc += 1) + val loc = parents.count(p => rdd.preferredLocations(p).contains(preferredLocation)) if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) } } From 02d6464f2f51217ab7435ef594f003943a742bbf Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 11:12:08 -0700 Subject: [PATCH 091/136] space removed --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 450d8b33a9..56f350b1f5 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -79,7 +79,7 @@ private[spark] case class CoalescedRDDPartition( class CoalescedRDD[T: ClassManifest]( @transient var prev: RDD[T], maxPartitions: Int, - balanceSlack: Double = 0.10 ) + balanceSlack: Double = 0.10) extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { From 339598c0806d01e3d43cd49dd02e9d510b5f586b Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 12:05:20 -0700 Subject: [PATCH 092/136] several of Reynold's suggestions implemented --- .../main/scala/spark/rdd/CoalescedRDD.scala | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 56f350b1f5..8d06b4ceb8 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -83,15 +83,13 @@ class CoalescedRDD[T: ClassManifest]( extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { - val res = mutable.ArrayBuffer[CoalescedRDDPartition]() val packer = new PartitionCoalescer(maxPartitions, prev, balanceSlack) - for ((pg, i) <- packer.getPartitions.zipWithIndex) { - val ids = pg.list.map(_.index).toArray - res += new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) + packer.getPartitions.zipWithIndex.map { + case (pg, i) => + val ids = pg.list.map(_.index).toArray + new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) } - - res.toArray } override def compute(partition: Partition, context: TaskContext): Iterator[T] = { @@ -119,10 +117,11 @@ class CoalescedRDD[T: ClassManifest]( * @return the machine most preferred by split */ override def getPreferredLocations(partition: Partition): Seq[String] = { - if (partition.isInstanceOf[CoalescedRDDPartition]) + if (partition.isInstanceOf[CoalescedRDDPartition]) { List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation) - else + } else { super.getPreferredLocations(partition) + } } } @@ -167,15 +166,15 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) // initializes/resets to start iterating from the beginning private def resetIterator() = { - val i1 = prev.partitions.view.map( (p: Partition) => + val i1 = prev.partitions.view.map{ p: Partition => { if (prev.preferredLocations(p).length > 0) - Some((prev.preferredLocations(p)(0),p)) else None } ) - val i2 = prev.partitions.view.map( (p: Partition) => + Some((prev.preferredLocations(p)(0),p)) else None } } + val i2 = prev.partitions.view.map{ p: Partition => { if (prev.preferredLocations(p).length > 1) - Some((prev.preferredLocations(p)(1),p)) else None } ) - val i3 = prev.partitions.view.map( (p: Partition) => + Some((prev.preferredLocations(p)(1),p)) else None } } + val i3 = prev.partitions.view.map{ p: Partition => { if (prev.preferredLocations(p).length > 2) - Some((prev.preferredLocations(p)(2),p)) else None } ) + Some((prev.preferredLocations(p)(2),p)) else None } } val res = List(i1,i2,i3) res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) } @@ -215,7 +214,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) pgroup.list += part // already assign this element initialHash += (part -> true) // needed to avoid assigning partitions to multiple buckets true - } else false + } else { false } } /** From 35537e6341dad72366a7ba6d92d6de9c710542ac Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Thu, 15 Aug 2013 12:52:13 -0700 Subject: [PATCH 093/136] Made a function object that returns the coalesced groups --- .../main/scala/spark/rdd/CoalescedRDD.scala | 65 ++++++++++--------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 8d06b4ceb8..1cfa404fd8 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -83,9 +83,9 @@ class CoalescedRDD[T: ClassManifest]( extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { - val packer = new PartitionCoalescer(maxPartitions, prev, balanceSlack) + val groupList = coalescePartitions(maxPartitions, prev, balanceSlack) - packer.getPartitions.zipWithIndex.map { + groupList.zipWithIndex.map { case (pg, i) => val ids = pg.list.map(_.index).toArray new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) @@ -126,46 +126,40 @@ class CoalescedRDD[T: ClassManifest]( } +class coalescePartitions protected (maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { -class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { - - private def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size - private def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = + protected def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size + protected def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) - private val rnd = new scala.util.Random(7919) // keep this class deterministic + protected val rnd = new scala.util.Random(7919) // keep this class deterministic // each element of groupArr represents one coalesced partition - private val groupArr = mutable.ArrayBuffer[PartitionGroup]() + protected val groupArr = mutable.ArrayBuffer[PartitionGroup]() // hash used to check whether some machine is already in groupArr - private val groupHash = mutable.Map[String, mutable.ListBuffer[PartitionGroup]]() + protected val groupHash = mutable.Map[String, mutable.ListBuffer[PartitionGroup]]() // hash used for the first maxPartitions (to avoid duplicates) - private val initialHash = mutable.Map[Partition, Boolean]() + protected val initialHash = mutable.Map[Partition, Boolean]() // determines the tradeoff between load-balancing the partitions sizes and their locality // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality - private val slack = (balanceSlack * prev.partitions.size).toInt + protected val slack = (balanceSlack * prev.partitions.size).toInt - private var noLocality = true // if true if no preferredLocations exists for parent RDD - - this.setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) - this.throwBalls() // assign partitions (balls) to each group (bins) - - def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + protected var noLocality = true // if true if no preferredLocations exists for parent RDD // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on // the rotator first goes through the first replica copy of each partition, then second, third // the iterators return type is a tuple: (replicaString, partition) - private class RotateLocations(prev: RDD[_]) extends Iterator[(String, Partition)] { + protected class RotateLocations(prev: RDD[_]) extends Iterator[(String, Partition)] { - private var it: Iterator[(String, Partition)] = resetIterator() + protected var it: Iterator[(String, Partition)] = resetIterator() override val isEmpty = !it.hasNext // initializes/resets to start iterating from the beginning - private def resetIterator() = { + protected def resetIterator() = { val i1 = prev.partitions.view.map{ p: Partition => { if (prev.preferredLocations(p).length > 0) Some((prev.preferredLocations(p)(0),p)) else None } } @@ -193,19 +187,13 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } } - case class PartitionGroup(prefLoc: String = "") { - var list = mutable.ListBuffer[Partition]() - - def size = list.size - } - /** * Sorts and gets the least element of the list associated with key in groupHash * The returned PartitionGroup is the least loaded of all groups that represent the machine "key" * @param key string representing a partitioned group on preferred machine key * @return Option of PartitionGroup that has least elements for key */ - private def getLeastGroupHash(key: String): Option[PartitionGroup] = { + protected def getLeastGroupHash(key: String): Option[PartitionGroup] = { groupHash.get(key).map(_.sortWith(compare).head) } @@ -223,7 +211,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) * until it has seen most of the preferred locations (2 * n log(n)) * @param targetLen */ - private def setupGroups(targetLen: Int) { + protected def setupGroups(targetLen: Int) { val rotIt = new RotateLocations(prev) // deal with empty case, just create targetLen partition groups with no preferred location @@ -276,7 +264,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) * @param p partition (ball to be thrown) * @return partition group (bin to be put in) */ - private def pickBin(p: Partition): PartitionGroup = { + protected def pickBin(p: Partition): PartitionGroup = { val pref = prev.preferredLocations(p).map(getLeastGroupHash(_)).sortWith(compare) // least load val prefPart = if (pref == Nil) None else pref.head @@ -295,7 +283,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } } - private def throwBalls() { + protected def throwBalls() { if (noLocality) { // no preferredLocations in parent RDD, no randomization needed if (maxPartitions > groupArr.size) { // just return prev.partitions for ((p,i) <- prev.partitions.zipWithIndex) { @@ -315,4 +303,21 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) } } + def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + +} + +case class PartitionGroup(prefLoc: String = "") { + var list = mutable.ListBuffer[Partition]() + + def size = list.size +} + +object coalescePartitions { + def apply(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) : Array[PartitionGroup] = { + val pc = new coalescePartitions(maxPartitions, prev, balanceSlack) + pc.setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) + pc.throwBalls() // assign partitions (balls) to each group (bins) + pc.getPartitions + } } From abcefb3858aac373bd8898c3e998375d5f26b803 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Fri, 16 Aug 2013 11:56:44 -0700 Subject: [PATCH 094/136] fixed matei's comments --- .../main/scala/spark/rdd/CoalescedRDD.scala | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 1cfa404fd8..e6fe8ec8ee 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -41,7 +41,7 @@ private[spark] case class CoalescedRDDPartition( * as one of their preferredLocations * @return locality of this coalesced partition between 0 and 1 */ - def localFraction :Double = { + def localFraction: Double = { val loc = parents.count(p => rdd.preferredLocations(p).contains(preferredLocation)) if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) } @@ -83,9 +83,9 @@ class CoalescedRDD[T: ClassManifest]( extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { - val groupList = coalescePartitions(maxPartitions, prev, balanceSlack) + val pc = new PartitionCoalescer(maxPartitions, prev, balanceSlack) - groupList.zipWithIndex.map { + pc.run().zipWithIndex.map { case (pg, i) => val ids = pg.list.map(_.index).toArray new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) @@ -126,7 +126,7 @@ class CoalescedRDD[T: ClassManifest]( } -class coalescePartitions protected (maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { +private[spark] class PartitionCoalescer (maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { protected def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size protected def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = @@ -303,21 +303,22 @@ class coalescePartitions protected (maxPartitions: Int, prev: RDD[_], balanceSla } } - def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + protected def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + /** + * Runs the packing algorithm and returns an array of PartitionGroups that if possible are + * load balanced and grouped by locality + * @return array of partition groups + */ + def run() : Array[PartitionGroup] = { + setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) + throwBalls() // assign partitions (balls) to each group (bins) + getPartitions + } } -case class PartitionGroup(prefLoc: String = "") { +private[spark] case class PartitionGroup(prefLoc: String = "") { var list = mutable.ListBuffer[Partition]() def size = list.size } - -object coalescePartitions { - def apply(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) : Array[PartitionGroup] = { - val pc = new coalescePartitions(maxPartitions, prev, balanceSlack) - pc.setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) - pc.throwBalls() // assign partitions (balls) to each group (bins) - pc.getPartitions - } -} From 33a0f59354197d667a97c600e2bb8fefe50c181b Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Fri, 16 Aug 2013 12:26:03 -0700 Subject: [PATCH 095/136] Added error messages to the tests to make failed tests less cryptic --- core/src/test/scala/spark/RDDSuite.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 8b6fa9c81e..ac406b9447 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -178,20 +178,20 @@ class RDDSuite extends FunSuite with SharedSparkContext { // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 val data = sc.makeRDD((1 to 9).map( i => (i, (i to (i+2)).map{ j => "m" + (j%6)} ))) val coalesced1 = data.coalesce(3) - assert(coalesced1.collect().toList.sorted === (1 to 9).toList) // no data lost + assert(coalesced1.collect().toList.sorted === (1 to 9).toList, "Data got *lost* in coalescing") val splits = coalesced1.glom().collect().map(_.toList).toList - assert(splits.length === 3) // ensure it indeed created 3 partitions + assert(splits.length == 3, "Supposed to coalesce to 3 but got " + splits.length) assert(splits.foldLeft(true) - ( (x,y) => if (!x) false else y.length >= 1) === true) // (1+ balance) + ( (x,y) => if (!x) false else y.length >= 1) === true, "Some partitions were empty") // If we try to coalesce into more partitions than the original RDD, it should just // keep the original number of partitions. val coalesced4 = data.coalesce(20) assert(coalesced4.glom().collect().map(_.toList).toList.sortWith( (x, y) => if (x.isEmpty) false else if (y.isEmpty) true else x(0) < y(0)) === (1 to 9). - map(x => List(x)).toList) + map(x => List(x)).toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back") // large scale experiment @@ -205,20 +205,20 @@ class RDDSuite extends FunSuite with SharedSparkContext { val blocks = (1 to partitions).map( i => (i, (i to (i+2)) .map{ j => machines(rnd.nextInt(machines.size)) } )) - val data2 = sc.makeRDD(blocks) + val data2 = sc.makeRDD(blocks) // .map( i => i*2 ) val coalesced2 = data2.coalesce(numMachines*2) // test that you get over 90% locality in each group val minLocality = coalesced2.partitions .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) .foldLeft(100.)( (perc, loc) => math.min(perc,loc) ) - assert(minLocality > 0.90) + assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.).toInt + "%") // test that the groups are load balanced with 100 +/- 20 elements in each val maxImbalance = coalesced2.partitions .map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) - assert(maxImbalance < 20) + assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance) } test("zipped RDDs") { From 3b5bb8a4ae1ebc0bbfa34c908a99274c343fe883 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Fri, 16 Aug 2013 13:12:11 -0700 Subject: [PATCH 096/136] added one test that will test a future functionality --- core/src/test/scala/spark/RDDSuite.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index ac406b9447..ad91263322 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -211,7 +211,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { // test that you get over 90% locality in each group val minLocality = coalesced2.partitions .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) - .foldLeft(100.)( (perc, loc) => math.min(perc,loc) ) + .foldLeft(1.)( (perc, loc) => math.min(perc,loc) ) assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.).toInt + "%") // test that the groups are load balanced with 100 +/- 20 elements in each @@ -219,6 +219,15 @@ class RDDSuite extends FunSuite with SharedSparkContext { .map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance) + + // TDD: Test for later when we have implemented functionality to get locality from DAGScheduler +// val data3 = sc.makeRDD(blocks).map( i => i*2 ) +// val coalesced3 = data3.coalesce(numMachines*2) +// val minLocality2 = coalesced3.partitions +// .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) +// .foldLeft(1.)( (perc, loc) => math.min(perc,loc) ) +// assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " + +// (minLocality2*100.).toInt + "%") } test("zipped RDDs") { From b69e7166ba76d35d75b98015b0d39a8a004a7436 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Fri, 16 Aug 2013 14:03:45 -0700 Subject: [PATCH 097/136] Coalescer now uses current preferred locations for derived RDDs. Made run() in DAGScheduler thread safe and added a method to be able to ask it for preferred locations. Added a similar method that wraps the former inside SparkContext. --- core/src/main/scala/spark/SparkContext.scala | 9 +++++ .../main/scala/spark/rdd/CoalescedRDD.scala | 30 +++++++++----- .../scala/spark/scheduler/DAGScheduler.scala | 40 +++++++++++-------- core/src/test/scala/spark/RDDSuite.scala | 14 +++---- 4 files changed, 59 insertions(+), 34 deletions(-) diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index fdd2dfa810..544c971efc 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -614,6 +614,15 @@ class SparkContext( addedFiles.clear() } + /** + * Gets the locality information associated with the partition in a particular rdd + * @param rdd of interest + * @param partition to be looked up for locality + * @return list of preferred locations for the partition + */ + def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = + dagScheduler.getPreferredLocs(rdd, partition) + /** * Adds a JAR dependency for all tasks to be executed on this SparkContext in the future. * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index e6fe8ec8ee..01d4bcadc2 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -17,9 +17,11 @@ package spark.rdd -import spark.{Dependency, OneToOneDependency, NarrowDependency, RDD, Partition, TaskContext} +import spark._ import java.io.{ObjectOutputStream, IOException} import scala.collection.mutable +import scala.Some +import spark.rdd.CoalescedRDDPartition private[spark] case class CoalescedRDDPartition( index: Int, @@ -42,7 +44,8 @@ private[spark] case class CoalescedRDDPartition( * @return locality of this coalesced partition between 0 and 1 */ def localFraction: Double = { - val loc = parents.count(p => rdd.preferredLocations(p).contains(preferredLocation)) + val loc = parents.count(p => rdd.context.getPreferredLocs(rdd, p.index) + .contains(preferredLocation)) if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) } } @@ -126,8 +129,8 @@ class CoalescedRDD[T: ClassManifest]( } -private[spark] class PartitionCoalescer (maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { - +private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], + balanceSlack: Double) { protected def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size protected def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) @@ -149,6 +152,10 @@ private[spark] class PartitionCoalescer (maxPartitions: Int, prev: RDD[_], balan protected var noLocality = true // if true if no preferredLocations exists for parent RDD + // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) + protected def currentPreferredLocations(rdd: RDD[_], part: Partition) : Seq[String] = + rdd.context.getPreferredLocs(rdd, part.index) + // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on // the rotator first goes through the first replica copy of each partition, then second, third @@ -161,14 +168,14 @@ private[spark] class PartitionCoalescer (maxPartitions: Int, prev: RDD[_], balan // initializes/resets to start iterating from the beginning protected def resetIterator() = { val i1 = prev.partitions.view.map{ p: Partition => - { if (prev.preferredLocations(p).length > 0) - Some((prev.preferredLocations(p)(0),p)) else None } } + { if (currentPreferredLocations(prev, p).length > 0) + Some((currentPreferredLocations(prev, p)(0),p)) else None } } val i2 = prev.partitions.view.map{ p: Partition => - { if (prev.preferredLocations(p).length > 1) - Some((prev.preferredLocations(p)(1),p)) else None } } + { if (currentPreferredLocations(prev, p).length > 1) + Some((currentPreferredLocations(prev, p)(1),p)) else None } } val i3 = prev.partitions.view.map{ p: Partition => - { if (prev.preferredLocations(p).length > 2) - Some((prev.preferredLocations(p)(2),p)) else None } } + { if (currentPreferredLocations(prev, p).length > 2) + Some((currentPreferredLocations(prev, p)(2),p)) else None } } val res = List(i1,i2,i3) res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) } @@ -265,7 +272,8 @@ private[spark] class PartitionCoalescer (maxPartitions: Int, prev: RDD[_], balan * @return partition group (bin to be put in) */ protected def pickBin(p: Partition): PartitionGroup = { - val pref = prev.preferredLocations(p).map(getLeastGroupHash(_)).sortWith(compare) // least load + val pref = prev.context.getPreferredLocs(prev, p.index). + map(getLeastGroupHash(_)).sortWith(compare) // least loaded of the pref locations val prefPart = if (pref == Nil) None else pref.head val r1 = rnd.nextInt(groupArr.size) diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala index 9402f18a0f..7275bd346a 100644 --- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/spark/scheduler/DAGScheduler.scala @@ -435,23 +435,24 @@ class DAGScheduler( if (event != null) { logDebug("Got event of type " + event.getClass.getName) } - - if (event != null) { - if (processEvent(event)) { - return + this.synchronized { // needed in case other threads makes calls into methods of this class + if (event != null) { + if (processEvent(event)) { + return + } } - } - val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability - // Periodically resubmit failed stages if some map output fetches have failed and we have - // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails, - // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at - // the same time, so we want to make sure we've identified all the reduce tasks that depend - // on the failed node. - if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) { - resubmitFailedStages() - } else { - submitWaitingStages() + val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability + // Periodically resubmit failed stages if some map output fetches have failed and we have + // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails, + // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at + // the same time, so we want to make sure we've identified all the reduce tasks that depend + // on the failed node. + if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) { + resubmitFailedStages() + } else { + submitWaitingStages() + } } } } @@ -789,7 +790,14 @@ class DAGScheduler( visitedRdds.contains(target.rdd) } - private def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = { + /** + * Synchronized method that might be called from other threads. + * @param rdd whose partitions are to be looked at + * @param partition to lookup locality information for + * @return list of machines that are preferred by the partition + */ + private[spark] + def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = synchronized { // If the partition is cached, return the cache locations val cached = getCacheLocs(rdd)(partition) if (!cached.isEmpty) { diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index ad91263322..0532435288 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -221,13 +221,13 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance) // TDD: Test for later when we have implemented functionality to get locality from DAGScheduler -// val data3 = sc.makeRDD(blocks).map( i => i*2 ) -// val coalesced3 = data3.coalesce(numMachines*2) -// val minLocality2 = coalesced3.partitions -// .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) -// .foldLeft(1.)( (perc, loc) => math.min(perc,loc) ) -// assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " + -// (minLocality2*100.).toInt + "%") + val data3 = sc.makeRDD(blocks).map( i => i*2 ) + val coalesced3 = data3.coalesce(numMachines*2) + val minLocality2 = coalesced3.partitions + .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) + .foldLeft(1.)( (perc, loc) => math.min(perc,loc) ) + assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " + + (minLocality2*100.).toInt + "%") } test("zipped RDDs") { From d6b6c680be7d8dcd186b42dbd6899de67852a8d9 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Fri, 16 Aug 2013 14:14:50 -0700 Subject: [PATCH 098/136] comment in the test to make it more understandable --- core/src/test/scala/spark/RDDSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 0532435288..da5f78aef5 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -221,7 +221,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance) // TDD: Test for later when we have implemented functionality to get locality from DAGScheduler - val data3 = sc.makeRDD(blocks).map( i => i*2 ) + val data3 = sc.makeRDD(blocks).map( i => i*2 ) // derived RDD to test *current* pref locs val coalesced3 = data3.coalesce(numMachines*2) val minLocality2 = coalesced3.partitions .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) From 890ea6ba792f9d3d07916a0833c3a83f0150e8cc Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Fri, 16 Aug 2013 17:52:53 -0700 Subject: [PATCH 099/136] making CoalescedRDDPartition public --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 01d4bcadc2..a5e3d74447 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -21,9 +21,8 @@ import spark._ import java.io.{ObjectOutputStream, IOException} import scala.collection.mutable import scala.Some -import spark.rdd.CoalescedRDDPartition -private[spark] case class CoalescedRDDPartition( +case class CoalescedRDDPartition( index: Int, @transient rdd: RDD[_], parentsIndices: Array[Int], From f1c853d76dce4fbc34f580be0a3ae15cc5be9c80 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Sun, 18 Aug 2013 20:42:35 -0700 Subject: [PATCH 100/136] fixed Matei's comments --- core/src/main/scala/spark/SparkContext.scala | 2 +- .../main/scala/spark/rdd/CoalescedRDD.scala | 145 ++++++++++-------- core/src/test/scala/spark/RDDSuite.scala | 25 +-- 3 files changed, 99 insertions(+), 73 deletions(-) diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index 544c971efc..7639749ecb 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -620,7 +620,7 @@ class SparkContext( * @param partition to be looked up for locality * @return list of preferred locations for the partition */ - def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = + private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = dagScheduler.getPreferredLocs(rdd, partition) /** diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index a5e3d74447..c475b7a8aa 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -21,13 +21,21 @@ import spark._ import java.io.{ObjectOutputStream, IOException} import scala.collection.mutable import scala.Some +import scala.collection.mutable.ArrayBuffer +/** + * Class that captures a coalesced RDD by essentially keeping track of parent partitions + * @param index of this coalesced partition + * @param rdd which it belongs to + * @param parentsIndices list of indices in the parent that have been coalesced into this partition + * @param preferredLocation the preferred location for this partition + */ case class CoalescedRDDPartition( - index: Int, - @transient rdd: RDD[_], - parentsIndices: Array[Int], - @transient preferredLocation: String = "" - ) extends Partition { + index: Int, + @transient rdd: RDD[_], + parentsIndices: Array[Int], + @transient preferredLocation: String = "" + ) extends Partition { var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_)) @throws(classOf[IOException]) @@ -43,45 +51,27 @@ case class CoalescedRDDPartition( * @return locality of this coalesced partition between 0 and 1 */ def localFraction: Double = { - val loc = parents.count(p => rdd.context.getPreferredLocs(rdd, p.index) - .contains(preferredLocation)) + val loc = parents.count(p => + rdd.context.getPreferredLocs(rdd, p.index).contains(preferredLocation)) + if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) } } /** - * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of - * this RDD computes one or more of the parent ones. Will produce exactly `maxPartitions` if the - * parent had more than this many partitions, or fewer if the parent had fewer. - * - * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, - * or to avoid having a large number of small tasks when processing a directory with many files. - * - * If there is no locality information (no preferredLocations) in the parent, then the coalescing - * is very simple: chunk parents that are close in the Array in chunks. - * If there is locality information, it proceeds to pack them with the following three goals: - * - * (1) Balance the groups so they roughly have the same number of parent partitions - * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer - * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard) - * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine - * - * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000. - * We assume the final number of desired partitions is small, e.g. less than 1000. - * - * The algorithm tries to assign unique preferred machines to each partition. If the number of - * desired partitions is greater than the number of preferred machines (can happen), it needs to - * start picking duplicate preferred machines. This is determined using coupon collector estimation - * (2n log(n)). The load balancing is done using power-of-two randomized bins-balls with one twist: - * it tries to also achieve locality. This is done by allowing a slack (balanceSlack) between two - * bins. If two bins are within the slack in terms of balance, the algorithm will assign partitions - * according to locality. (contact alig for questions) - * + * Represents a coalesced RDD that has fewer partitions than its parent RDD + * This class uses the PartitionCoalescer class to find a good partitioning of the parent RDD + * so that each new partition has roughly the same number of parent partitions and that + * the preferred location of each new partition overlaps with as many preferred locations of its + * parent partitions + * @param prev RDD to be coalesced + * @param maxPartitions number of desired partitions in the coalesced RDD + * @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance */ class CoalescedRDD[T: ClassManifest]( - @transient var prev: RDD[T], - maxPartitions: Int, - balanceSlack: Double = 0.10) + @transient var prev: RDD[T], + maxPartitions: Int, + balanceSlack: Double = 0.10) extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { @@ -128,44 +118,75 @@ class CoalescedRDD[T: ClassManifest]( } -private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], - balanceSlack: Double) { - protected def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size - protected def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = +/** + * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of + * this RDD computes one or more of the parent ones. Will produce exactly `maxPartitions` if the + * parent had more than this many partitions, or fewer if the parent had fewer. + * + * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, + * or to avoid having a large number of small tasks when processing a directory with many files. + * + * If there is no locality information (no preferredLocations) in the parent, then the coalescing + * is very simple: chunk parents that are close in the Array in chunks. + * If there is locality information, it proceeds to pack them with the following three goals: + * + * (1) Balance the groups so they roughly have the same number of parent partitions + * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer + * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard) + * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine + * + * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000. + * We assume the final number of desired partitions is small, e.g. less than 1000. + * + * The algorithm tries to assign unique preferred machines to each partition. If the number of + * desired partitions is greater than the number of preferred machines (can happen), it needs to + * start picking duplicate preferred machines. This is determined using coupon collector estimation + * (2n log(n)). The load balancing is done using power-of-two randomized bins-balls with one twist: + * it tries to also achieve locality. This is done by allowing a slack (balanceSlack) between two + * bins. If two bins are within the slack in terms of balance, the algorithm will assign partitions + * according to locality. (contact alig for questions) + * + */ + +private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) { + + def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size + def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean = if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get) - protected val rnd = new scala.util.Random(7919) // keep this class deterministic + val rnd = new scala.util.Random(7919) // keep this class deterministic // each element of groupArr represents one coalesced partition - protected val groupArr = mutable.ArrayBuffer[PartitionGroup]() + val groupArr = ArrayBuffer[PartitionGroup]() // hash used to check whether some machine is already in groupArr - protected val groupHash = mutable.Map[String, mutable.ListBuffer[PartitionGroup]]() + val groupHash = mutable.Map[String, ArrayBuffer[PartitionGroup]]() // hash used for the first maxPartitions (to avoid duplicates) - protected val initialHash = mutable.Map[Partition, Boolean]() + val initialHash = mutable.Set[Partition]() // determines the tradeoff between load-balancing the partitions sizes and their locality // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality - protected val slack = (balanceSlack * prev.partitions.size).toInt + val slack = (balanceSlack * prev.partitions.size).toInt - protected var noLocality = true // if true if no preferredLocations exists for parent RDD + var noLocality = true // if true if no preferredLocations exists for parent RDD // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) - protected def currentPreferredLocations(rdd: RDD[_], part: Partition) : Seq[String] = + def currentPreferredLocations(rdd: RDD[_], part: Partition) : Seq[String] = rdd.context.getPreferredLocs(rdd, part.index) // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on // the rotator first goes through the first replica copy of each partition, then second, third // the iterators return type is a tuple: (replicaString, partition) - protected class RotateLocations(prev: RDD[_]) extends Iterator[(String, Partition)] { + class LocationIterator(prev: RDD[_]) extends Iterator[(String, Partition)] { + + var it: Iterator[(String, Partition)] = resetIterator() - protected var it: Iterator[(String, Partition)] = resetIterator() override val isEmpty = !it.hasNext // initializes/resets to start iterating from the beginning - protected def resetIterator() = { + def resetIterator() = { val i1 = prev.partitions.view.map{ p: Partition => { if (currentPreferredLocations(prev, p).length > 0) Some((currentPreferredLocations(prev, p)(0),p)) else None } } @@ -177,10 +198,14 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], Some((currentPreferredLocations(prev, p)(2),p)) else None } } val res = List(i1,i2,i3) res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) + + // prev.partitions.iterator.map{p: Partition => { (currentPreferredLocations(prev, p)(0),p) }} ++ + // prev.partitions.iterator.map{p: Partition => { (currentPreferredLocations(prev, p)(1),p) }} ++ + // prev.partitions.iterator.map{p: Partition => { (currentPreferredLocations(prev, p)(2),p) }} } // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD - def hasNext(): Boolean = !isEmpty + def hasNext(): Boolean = { !isEmpty } // return the next preferredLocation of some partition of the RDD def next(): (String, Partition) = { @@ -199,14 +224,14 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], * @param key string representing a partitioned group on preferred machine key * @return Option of PartitionGroup that has least elements for key */ - protected def getLeastGroupHash(key: String): Option[PartitionGroup] = { + def getLeastGroupHash(key: String): Option[PartitionGroup] = { groupHash.get(key).map(_.sortWith(compare).head) } def addPartToPGroup(part : Partition, pgroup : PartitionGroup) : Boolean = { if (!initialHash.contains(part)) { pgroup.list += part // already assign this element - initialHash += (part -> true) // needed to avoid assigning partitions to multiple buckets + initialHash += part // needed to avoid assigning partitions to multiple buckets true } else { false } } @@ -217,8 +242,8 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], * until it has seen most of the preferred locations (2 * n log(n)) * @param targetLen */ - protected def setupGroups(targetLen: Int) { - val rotIt = new RotateLocations(prev) + def setupGroups(targetLen: Int) { + val rotIt = new LocationIterator(prev) // deal with empty case, just create targetLen partition groups with no preferred location if (!rotIt.hasNext()) { @@ -243,7 +268,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], val pgroup = PartitionGroup(nxt_replica) groupArr += pgroup addPartToPGroup(nxt_part, pgroup) - groupHash += (nxt_replica -> (mutable.ListBuffer(pgroup))) // list in case we have multiple + groupHash += (nxt_replica -> (ArrayBuffer(pgroup))) // list in case we have multiple numCreated += 1 } } @@ -270,7 +295,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], * @param p partition (ball to be thrown) * @return partition group (bin to be put in) */ - protected def pickBin(p: Partition): PartitionGroup = { + def pickBin(p: Partition): PartitionGroup = { val pref = prev.context.getPreferredLocs(prev, p.index). map(getLeastGroupHash(_)).sortWith(compare) // least loaded of the pref locations val prefPart = if (pref == Nil) None else pref.head @@ -290,7 +315,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], } } - protected def throwBalls() { + def throwBalls() { if (noLocality) { // no preferredLocations in parent RDD, no randomization needed if (maxPartitions > groupArr.size) { // just return prev.partitions for ((p,i) <- prev.partitions.zipWithIndex) { @@ -310,7 +335,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], } } - protected def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray /** * Runs the packing algorithm and returns an array of PartitionGroups that if possible are diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index da5f78aef5..14f7c62782 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -176,7 +176,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("cogrouped RDDs with locality") { // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 - val data = sc.makeRDD((1 to 9).map( i => (i, (i to (i+2)).map{ j => "m" + (j%6)} ))) + val data = sc.makeRDD((1 to 9).map(i => (i, (i to (i+2)).map{ j => "m" + (j%6)}))) val coalesced1 = data.coalesce(3) assert(coalesced1.collect().toList.sorted === (1 to 9).toList, "Data got *lost* in coalescing") @@ -184,7 +184,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(splits.length == 3, "Supposed to coalesce to 3 but got " + splits.length) assert(splits.foldLeft(true) - ( (x,y) => if (!x) false else y.length >= 1) === true, "Some partitions were empty") + ((x,y) => if (!x) false else y.length >= 1) === true, "Some partitions were empty") // If we try to coalesce into more partitions than the original RDD, it should just // keep the original number of partitions. @@ -193,7 +193,9 @@ class RDDSuite extends FunSuite with SharedSparkContext { (x, y) => if (x.isEmpty) false else if (y.isEmpty) true else x(0) < y(0)) === (1 to 9). map(x => List(x)).toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back") + } + test("cogrouped RDDs with locality, large scale (10K partitions)") { // large scale experiment import collection.mutable val rnd = scala.util.Random @@ -202,30 +204,29 @@ class RDDSuite extends FunSuite with SharedSparkContext { val machines = mutable.ListBuffer[String]() (1 to numMachines).foreach(machines += "m"+_) - val blocks = (1 to partitions).map( i => (i, (i to (i+2)) - .map{ j => machines(rnd.nextInt(machines.size)) } )) + val blocks = (1 to partitions).map(i => (i, (i to (i+2)) + .map{ j => machines(rnd.nextInt(machines.size))})) - val data2 = sc.makeRDD(blocks) // .map( i => i*2 ) + val data2 = sc.makeRDD(blocks) val coalesced2 = data2.coalesce(numMachines*2) // test that you get over 90% locality in each group val minLocality = coalesced2.partitions - .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) - .foldLeft(1.)( (perc, loc) => math.min(perc,loc) ) + .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction) + .foldLeft(1.)((perc, loc) => math.min(perc,loc)) assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.).toInt + "%") // test that the groups are load balanced with 100 +/- 20 elements in each val maxImbalance = coalesced2.partitions - .map( part => part.asInstanceOf[CoalescedRDDPartition].parents.size ) + .map(part => part.asInstanceOf[CoalescedRDDPartition].parents.size) .foldLeft(0)((dev, curr) => math.max(math.abs(100-curr),dev)) assert(maxImbalance <= 20, "Expected 100 +/- 20 per partition, but got " + maxImbalance) - // TDD: Test for later when we have implemented functionality to get locality from DAGScheduler - val data3 = sc.makeRDD(blocks).map( i => i*2 ) // derived RDD to test *current* pref locs + val data3 = sc.makeRDD(blocks).map(i => i*2) // derived RDD to test *current* pref locs val coalesced3 = data3.coalesce(numMachines*2) val minLocality2 = coalesced3.partitions - .map( part => part.asInstanceOf[CoalescedRDDPartition].localFraction ) - .foldLeft(1.)( (perc, loc) => math.min(perc,loc) ) + .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction) + .foldLeft(1.)((perc, loc) => math.min(perc,loc)) assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " + (minLocality2*100.).toInt + "%") } From a75a64eade3f540e72fc0bc646ba74a689f03f3e Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Mon, 19 Aug 2013 12:09:08 -0700 Subject: [PATCH 101/136] Fixed almost all of Matei's feedback --- .../main/scala/spark/rdd/CoalescedRDD.scala | 43 ++++++++----------- core/src/test/scala/spark/RDDSuite.scala | 14 +++--- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index c475b7a8aa..04bb8089a4 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -79,7 +79,7 @@ class CoalescedRDD[T: ClassManifest]( pc.run().zipWithIndex.map { case (pg, i) => - val ids = pg.list.map(_.index).toArray + val ids = pg.arr.map(_.index).toArray new CoalescedRDDPartition(i, prev, ids, pg.prefLoc) } } @@ -109,26 +109,21 @@ class CoalescedRDD[T: ClassManifest]( * @return the machine most preferred by split */ override def getPreferredLocations(partition: Partition): Seq[String] = { - if (partition.isInstanceOf[CoalescedRDDPartition]) { - List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation) - } else { - super.getPreferredLocations(partition) - } + List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation) } - } /** * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of - * this RDD computes one or more of the parent ones. Will produce exactly `maxPartitions` if the - * parent had more than this many partitions, or fewer if the parent had fewer. + * this RDD computes one or more of the parent ones. It will produce exactly `maxPartitions` if the + * parent had more than maxPartitions, or fewer if the parent had fewer. * * This transformation is useful when an RDD with many partitions gets filtered into a smaller one, * or to avoid having a large number of small tasks when processing a directory with many files. * * If there is no locality information (no preferredLocations) in the parent, then the coalescing * is very simple: chunk parents that are close in the Array in chunks. - * If there is locality information, it proceeds to pack them with the following three goals: + * If there is locality information, it proceeds to pack them with the following four goals: * * (1) Balance the groups so they roughly have the same number of parent partitions * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer @@ -172,7 +167,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc var noLocality = true // if true if no preferredLocations exists for parent RDD // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) - def currentPreferredLocations(rdd: RDD[_], part: Partition) : Seq[String] = + def currentPreferredLocations(rdd: RDD[_], part: Partition): Seq[String] = rdd.context.getPreferredLocs(rdd, part.index) // this class just keeps iterating and rotating infinitely over the partitions of the RDD @@ -199,9 +194,9 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc val res = List(i1,i2,i3) res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) - // prev.partitions.iterator.map{p: Partition => { (currentPreferredLocations(prev, p)(0),p) }} ++ - // prev.partitions.iterator.map{p: Partition => { (currentPreferredLocations(prev, p)(1),p) }} ++ - // prev.partitions.iterator.map{p: Partition => { (currentPreferredLocations(prev, p)(2),p) }} + // prev.partitions.iterator.map(p: Partition => { (currentPreferredLocations(prev, p)(0),p) }) ++ + // prev.partitions.iterator.map(p: Partition => { (currentPreferredLocations(prev, p)(1),p) }) ++ + // prev.partitions.iterator.map(p: Partition => { (currentPreferredLocations(prev, p)(2),p) }) } // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD @@ -228,9 +223,9 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc groupHash.get(key).map(_.sortWith(compare).head) } - def addPartToPGroup(part : Partition, pgroup : PartitionGroup) : Boolean = { + def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = { if (!initialHash.contains(part)) { - pgroup.list += part // already assign this element + pgroup.arr += part // already assign this element initialHash += part // needed to avoid assigning partitions to multiple buckets true } else { false } @@ -319,30 +314,30 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc if (noLocality) { // no preferredLocations in parent RDD, no randomization needed if (maxPartitions > groupArr.size) { // just return prev.partitions for ((p,i) <- prev.partitions.zipWithIndex) { - groupArr(i).list += p + groupArr(i).arr += p } } else { // no locality available, then simply split partitions based on positions in array - (0 until maxPartitions).foreach { i => + for(i <- 0 until maxPartitions) { val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt - (rangeStart until rangeEnd).foreach{ j => groupArr(i).list += prev.partitions(j) } + (rangeStart until rangeEnd).foreach{ j => groupArr(i).arr += prev.partitions(j) } } } } else { for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into group - pickBin(p).list += p + pickBin(p).arr += p } } } - def getPartitions : Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray + def getPartitions: Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray /** * Runs the packing algorithm and returns an array of PartitionGroups that if possible are * load balanced and grouped by locality * @return array of partition groups */ - def run() : Array[PartitionGroup] = { + def run(): Array[PartitionGroup] = { setupGroups(math.min(prev.partitions.length, maxPartitions)) // setup the groups (bins) throwBalls() // assign partitions (balls) to each group (bins) getPartitions @@ -350,7 +345,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc } private[spark] case class PartitionGroup(prefLoc: String = "") { - var list = mutable.ListBuffer[Partition]() + var arr = mutable.ArrayBuffer[Partition]() - def size = list.size + def size = arr.size } diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 14f7c62782..64e2c0605b 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -181,7 +181,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(coalesced1.collect().toList.sorted === (1 to 9).toList, "Data got *lost* in coalescing") val splits = coalesced1.glom().collect().map(_.toList).toList - assert(splits.length == 3, "Supposed to coalesce to 3 but got " + splits.length) + assert(splits.length === 3, "Supposed to coalesce to 3 but got " + splits.length) assert(splits.foldLeft(true) ((x,y) => if (!x) false else y.length >= 1) === true, "Some partitions were empty") @@ -189,10 +189,10 @@ class RDDSuite extends FunSuite with SharedSparkContext { // If we try to coalesce into more partitions than the original RDD, it should just // keep the original number of partitions. val coalesced4 = data.coalesce(20) - assert(coalesced4.glom().collect().map(_.toList).toList.sortWith( - (x, y) => if (x.isEmpty) false else if (y.isEmpty) true else x(0) < y(0)) === (1 to 9). - map(x => List(x)).toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back") - + val listOfLists = coalesced4.glom().collect().map(_.toList).toList + val sortedList = listOfLists.sortWith{ (x, y) => !x.isEmpty && (y.isEmpty || (x(0) < y(0))) } + assert( sortedList === (1 to 9). + map{x => List(x)}.toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back") } test("cogrouped RDDs with locality, large scale (10K partitions)") { @@ -204,8 +204,8 @@ class RDDSuite extends FunSuite with SharedSparkContext { val machines = mutable.ListBuffer[String]() (1 to numMachines).foreach(machines += "m"+_) - val blocks = (1 to partitions).map(i => (i, (i to (i+2)) - .map{ j => machines(rnd.nextInt(machines.size))})) + val blocks = (1 to partitions).map(i => + { (i, Array.fill(3)(machines(rnd.nextInt(machines.size))).toList) } ) val data2 = sc.makeRDD(blocks) val coalesced2 = data2.coalesce(numMachines*2) From 9192c358e40f2b3954d9939d7e153e3dd4d4ba75 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Mon, 19 Aug 2013 13:13:24 -0700 Subject: [PATCH 102/136] simpler code --- .../main/scala/spark/rdd/CoalescedRDD.scala | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 04bb8089a4..25c48caad3 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -167,7 +167,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc var noLocality = true // if true if no preferredLocations exists for parent RDD // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) - def currentPreferredLocations(rdd: RDD[_], part: Partition): Seq[String] = + def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = rdd.context.getPreferredLocs(rdd, part.index) // this class just keeps iterating and rotating infinitely over the partitions of the RDD @@ -182,21 +182,12 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc // initializes/resets to start iterating from the beginning def resetIterator() = { - val i1 = prev.partitions.view.map{ p: Partition => - { if (currentPreferredLocations(prev, p).length > 0) - Some((currentPreferredLocations(prev, p)(0),p)) else None } } - val i2 = prev.partitions.view.map{ p: Partition => - { if (currentPreferredLocations(prev, p).length > 1) - Some((currentPreferredLocations(prev, p)(1),p)) else None } } - val i3 = prev.partitions.view.map{ p: Partition => - { if (currentPreferredLocations(prev, p).length > 2) - Some((currentPreferredLocations(prev, p)(2),p)) else None } } - val res = List(i1,i2,i3) - res.view.flatMap(x => x).flatten.iterator // fuses the 3 iterators (1st replica, 2nd, 3rd) - - // prev.partitions.iterator.map(p: Partition => { (currentPreferredLocations(prev, p)(0),p) }) ++ - // prev.partitions.iterator.map(p: Partition => { (currentPreferredLocations(prev, p)(1),p) }) ++ - // prev.partitions.iterator.map(p: Partition => { (currentPreferredLocations(prev, p)(2),p) }) + val iterators = (0 to 2).map( x => + prev.partitions.iterator.flatMap(p => { + if (currPrefLocs(prev, p).size > x) Some((currPrefLocs(prev, p)(x), p)) else None + } ) + ) + iterators.reduceLeft((x, y) => x ++ y) } // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD From 7b123b3126d555237f31a0787411c4bbc1abd39a Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Mon, 19 Aug 2013 18:52:43 -0700 Subject: [PATCH 103/136] Simpler code --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 6 +++--- core/src/test/scala/spark/RDDSuite.scala | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index 25c48caad3..b83d443de3 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -167,8 +167,8 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc var noLocality = true // if true if no preferredLocations exists for parent RDD // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) - def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = - rdd.context.getPreferredLocs(rdd, part.index) + def currPrefLocs(part: Partition): Seq[String] = + prev.context.getPreferredLocs(prev, part.index) // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on @@ -184,7 +184,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc def resetIterator() = { val iterators = (0 to 2).map( x => prev.partitions.iterator.flatMap(p => { - if (currPrefLocs(prev, p).size > x) Some((currPrefLocs(prev, p)(x), p)) else None + if (currPrefLocs(p).size > x) Some((currPrefLocs(p)(x), p)) else None } ) ) iterators.reduceLeft((x, y) => x ++ y) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 64e2c0605b..9e73703371 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -183,8 +183,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { val splits = coalesced1.glom().collect().map(_.toList).toList assert(splits.length === 3, "Supposed to coalesce to 3 but got " + splits.length) - assert(splits.foldLeft(true) - ((x,y) => if (!x) false else y.length >= 1) === true, "Some partitions were empty") + assert(splits.forall(_.length >= 1) === true, "Some partitions were empty") // If we try to coalesce into more partitions than the original RDD, it should just // keep the original number of partitions. From 5db41919b5aafcad050726601c6f3be8e0bf879a Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Mon, 19 Aug 2013 19:11:02 -0700 Subject: [PATCH 104/136] Added a test to make sure no locality preferences are ignored --- core/src/test/scala/spark/RDDSuite.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 9e73703371..3c1b387d16 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -175,6 +175,11 @@ class RDDSuite extends FunSuite with SharedSparkContext { null) } test("cogrouped RDDs with locality") { + val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b")))) + val coalesced3 = data3.coalesce(3) + val list3 = coalesced3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition]) + assert(list3.map(p => p.preferredLocation).length === 3, "Locality preferences are dropped") + // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 val data = sc.makeRDD((1 to 9).map(i => (i, (i to (i+2)).map{ j => "m" + (j%6)}))) val coalesced1 = data.coalesce(3) From c0942a710f25c9c690761b0814a07deacd4df595 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Mon, 19 Aug 2013 19:14:30 -0700 Subject: [PATCH 105/136] Bug in test fixed --- core/src/test/scala/spark/RDDSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 3c1b387d16..e306952bbd 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -176,9 +176,9 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("cogrouped RDDs with locality") { val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b")))) - val coalesced3 = data3.coalesce(3) - val list3 = coalesced3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition]) - assert(list3.map(p => p.preferredLocation).length === 3, "Locality preferences are dropped") + val coal3 = data3.coalesce(3) + val list3 = coal3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition].preferredLocation) + assert(list3.sorted === Array("a","b","c"), "Locality preferences are dropped") // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 val data = sc.makeRDD((1 to 9).map(i => (i, (i to (i+2)).map{ j => "m" + (j%6)}))) From db4bc55bef47aaac85c280d5c7d0d82217101b84 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Tue, 20 Aug 2013 12:17:55 -0700 Subject: [PATCH 106/136] indent --- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index b83d443de3..f880cea2ad 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -260,7 +260,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc } while (numCreated < targetLen) { // if we don't have enough partition groups, create duplicates - var (nxt_replica, nxt_part) = rotIt.next() + var (nxt_replica, nxt_part) = rotIt.next() val pgroup = PartitionGroup(nxt_replica) groupArr += pgroup groupHash.get(nxt_replica).get += pgroup From 5cd21c41950e3004d3326bbc56286285531063f9 Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Tue, 20 Aug 2013 13:59:48 -0700 Subject: [PATCH 107/136] added curly braces to make the code more consistent --- core/src/main/scala/spark/SparkContext.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index 7639749ecb..a8cc4f3eb8 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -620,8 +620,9 @@ class SparkContext( * @param partition to be looked up for locality * @return list of preferred locations for the partition */ - private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = + private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = { dagScheduler.getPreferredLocs(rdd, partition) + } /** * Adds a JAR dependency for all tasks to be executed on this SparkContext in the future. From f20ed14e87aa1b9ff148f44b590ffd3c9d024f3c Mon Sep 17 00:00:00 2001 From: Ali Ghodsi Date: Tue, 20 Aug 2013 16:21:43 -0700 Subject: [PATCH 108/136] Merged in from upstream to use TaskLocation instead of strings --- core/src/main/scala/spark/SparkContext.scala | 9 ++++++--- core/src/main/scala/spark/rdd/CoalescedRDD.scala | 10 +++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index a8cc4f3eb8..23dfbcd604 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -56,8 +56,7 @@ import spark.deploy.LocalSparkCluster import spark.partial.{ApproximateEvaluator, PartialResult} import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD, OrderedRDDFunctions} -import spark.scheduler.{DAGScheduler, DAGSchedulerSource, ResultTask, ShuffleMapTask, SparkListener, - SplitInfo, Stage, StageInfo, TaskScheduler} +import spark.scheduler._ import spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, ClusterScheduler, Schedulable, SchedulingMode} import spark.scheduler.local.LocalScheduler @@ -65,6 +64,10 @@ import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend import spark.storage.{StorageStatus, StorageUtils, RDDInfo, BlockManagerSource} import spark.ui.SparkUI import spark.util.{MetadataCleaner, TimeStampedHashMap} +import scala.Some +import spark.scheduler.StageInfo +import spark.storage.RDDInfo +import spark.storage.StorageStatus /** * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark @@ -620,7 +623,7 @@ class SparkContext( * @param partition to be looked up for locality * @return list of preferred locations for the partition */ - private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = { + private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = { dagScheduler.getPreferredLocs(rdd, partition) } diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala index f880cea2ad..e612d026b2 100644 --- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala @@ -52,7 +52,7 @@ case class CoalescedRDDPartition( */ def localFraction: Double = { val loc = parents.count(p => - rdd.context.getPreferredLocs(rdd, p.index).contains(preferredLocation)) + rdd.context.getPreferredLocs(rdd, p.index).map(tl => tl.host).contains(preferredLocation)) if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble) } @@ -167,8 +167,9 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc var noLocality = true // if true if no preferredLocations exists for parent RDD // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones) - def currPrefLocs(part: Partition): Seq[String] = - prev.context.getPreferredLocs(prev, part.index) + def currPrefLocs(part: Partition): Seq[String] = { + prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host) + } // this class just keeps iterating and rotating infinitely over the partitions of the RDD // next() returns the next preferred machine that a partition is replicated on @@ -282,8 +283,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc * @return partition group (bin to be put in) */ def pickBin(p: Partition): PartitionGroup = { - val pref = prev.context.getPreferredLocs(prev, p.index). - map(getLeastGroupHash(_)).sortWith(compare) // least loaded of the pref locations + val pref = currPrefLocs(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded pref locs val prefPart = if (pref == Nil) None else pref.head val r1 = rnd.nextInt(groupArr.size) From 5eea613ec08c24ca4d4d5f0a8beebbe424334605 Mon Sep 17 00:00:00 2001 From: Mark Hamstra Date: Tue, 20 Aug 2013 16:49:18 -0700 Subject: [PATCH 109/136] Removed meaningless types --- core/src/main/scala/spark/util/MutablePair.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/spark/util/MutablePair.scala b/core/src/main/scala/spark/util/MutablePair.scala index 3063806e83..78d404e66b 100644 --- a/core/src/main/scala/spark/util/MutablePair.scala +++ b/core/src/main/scala/spark/util/MutablePair.scala @@ -32,5 +32,5 @@ case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/ { override def toString = "(" + _1 + "," + _2 + ")" - override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[T1, T2]] + override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[_,_]] } From 51a1a0c602481620dd914f3024dee52c3312058d Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 20 Aug 2013 22:14:52 -0700 Subject: [PATCH 110/136] Bump spark version --- ec2/spark_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 182cbbbb7c..6c48d9765f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -68,7 +68,7 @@ def parse_args(): parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use") - parser.add_option("-v", "--spark-version", default="0.7.2", + parser.add_option("-v", "--spark-version", default="0.7.3", help="Version of Spark to use: 'X.Y.Z' or a specific git hash") parser.add_option("--spark-git-repo", default="https://github.com/mesos/spark", @@ -158,7 +158,7 @@ def is_active(instance): # Return correct versions of Spark and Shark, given the supplied Spark version def get_spark_shark_version(opts): - spark_shark_map = {"0.7.2": "0.7.0"} + spark_shark_map = {"0.7.3": "0.7.0"} version = opts.spark_version.replace("v", "") if version not in spark_shark_map: print >> stderr, "Don't know about Spark version: %s" % version From 53b1c30607a9b19e795fd5b6107dfefb83820282 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 20 Aug 2013 22:57:11 -0700 Subject: [PATCH 111/136] Update docs for Spark UI port --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index dff08a06f5..b125eeb03c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -146,7 +146,7 @@ Apart from these, the following properties are also available, and may be useful spark.ui.port - 33000 + 3030 Port for your application's dashboard, which shows memory and workload data From af602ba9d31ccf5a71b77904fce31c3c1376eeac Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 21 Aug 2013 11:38:24 -0700 Subject: [PATCH 112/136] Downgraded default build hadoop version to 1.0.4. --- pom.xml | 2 +- project/SparkBuild.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index fc0b314070..587b42c55a 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ 2.0.3 1.7.2 1.2.17 - 1.2.1 + 1.0.4 64m diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 831bfbed78..81080741ca 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -28,7 +28,7 @@ object SparkBuild extends Build { // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. // Note that these variables can be set through the environment variables // SPARK_HADOOP_VERSION and SPARK_WITH_YARN. - val DEFAULT_HADOOP_VERSION = "1.2.1" + val DEFAULT_HADOOP_VERSION = "1.0.4" val DEFAULT_WITH_YARN = false // HBase version; set as appropriate. From 111b2741fd4bacd5f0b31add22acd28d7d884299 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 21 Aug 2013 11:54:10 -0700 Subject: [PATCH 113/136] Change default SPARK_HADOOP_VERSION in make-distribution.sh too --- make-distribution.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make-distribution.sh b/make-distribution.sh index 55dc22b992..70aff418c7 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -46,7 +46,7 @@ export TERM=dumb # Prevents color codes in SBT output VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') # Initialize defaults -SPARK_HADOOP_VERSION=1.2.1 +SPARK_HADOOP_VERSION=1.0.4 SPARK_WITH_YARN=false MAKE_TGZ=false From 9c6f8df30f70ee8ee3a8c132dec251c355db2c28 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 12:57:56 -0700 Subject: [PATCH 114/136] Update jekyll plugin to match docs/README.md --- docs/_plugins/copy_api_dirs.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index 217254c59f..c574ea7f5c 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -18,7 +18,7 @@ require 'fileutils' include FileUtils -if ENV['SKIP_API'] != '1' +if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1') # Build Scaladoc for Java/Scala projects = ["core", "examples", "repl", "bagel", "streaming", "mllib"] From 31644a011c1c61ca010b91dc83373e15960f4d23 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 13:24:28 -0700 Subject: [PATCH 115/136] Use "hadoop.version" property when specifying Hadoop YARN version too --- pom.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index 587b42c55a..b963782ee8 100644 --- a/pom.xml +++ b/pom.xml @@ -579,8 +579,8 @@ 2 - - 2.0.5-alpha + + 2.0.5-alpha @@ -607,7 +607,7 @@ org.apache.hadoop hadoop-client - ${yarn.version} + ${hadoop.version} asm @@ -638,7 +638,7 @@ org.apache.hadoop hadoop-yarn-api - ${yarn.version} + ${hadoop.version} asm @@ -669,7 +669,7 @@ org.apache.hadoop hadoop-yarn-common - ${yarn.version} + ${hadoop.version} asm @@ -700,7 +700,7 @@ org.apache.hadoop hadoop-yarn-client - ${yarn.version} + ${hadoop.version} asm From ff6f1b0500a9a75b61c225f3dc80d4026e98bcd5 Mon Sep 17 00:00:00 2001 From: Mark Hamstra Date: Wed, 21 Aug 2013 13:50:24 -0700 Subject: [PATCH 116/136] Synced sbt and maven builds --- core/pom.xml | 4 ++++ pom.xml | 16 +++++++++++----- project/SparkBuild.scala | 5 +---- repl/pom.xml | 6 ++++++ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 6627a87de1..3d70a19584 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -52,6 +52,10 @@ com.google.guava guava + + com.google.code.findbugs + jsr305 + org.slf4j slf4j-api diff --git a/pom.xml b/pom.xml index fc0b314070..db696bad12 100644 --- a/pom.xml +++ b/pom.xml @@ -71,10 +71,10 @@ 1.5 2.9.3 0.12.1 - 2.0.3 + 2.0.5 1.7.2 1.2.17 - 1.2.1 + 1.0.4 64m @@ -157,12 +157,17 @@ org.eclipse.jetty jetty-server - 7.5.3.v20111011 + 7.6.8.v20121106 com.google.guava guava - 11.0.1 + 14.0.1 + + + com.google.code.findbugs + jsr305 + 1.3.9 org.slf4j @@ -318,7 +323,7 @@ com.novocode junit-interface - 0.8 + 0.9 test @@ -437,6 +442,7 @@ -unchecked -optimise + -deprecation -Xms64m diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 831bfbed78..78f9c4e981 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -134,7 +134,6 @@ object SparkBuild extends Build { */ libraryDependencies ++= Seq( - "io.netty" % "netty" % "3.5.3.Final", "org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106", "org.scalatest" %% "scalatest" % "1.9.1" % "test", "org.scalacheck" %% "scalacheck" % "1.10.0" % "test", @@ -165,17 +164,15 @@ object SparkBuild extends Build { name := "spark-core", resolvers ++= Seq( "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/", - "Spray Repository" at "http://repo.spray.cc/", "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/" ), libraryDependencies ++= Seq( "com.google.guava" % "guava" % "14.0.1", "com.google.code.findbugs" % "jsr305" % "1.3.9", - "log4j" % "log4j" % "1.2.16", + "log4j" % "log4j" % "1.2.17", "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, - "commons-daemon" % "commons-daemon" % "1.0.10", "com.ning" % "compress-lzf" % "0.8.4", "org.xerial.snappy" % "snappy-java" % "1.0.5", "org.ow2.asm" % "asm" % "4.0", diff --git a/repl/pom.xml b/repl/pom.xml index 5bc9a99c5c..f800664cff 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -48,6 +48,12 @@ ${project.version} runtime + + org.spark-project + spark-mllib + ${project.version} + runtime + org.eclipse.jetty jetty-server From 66e7a38a3229eeb6d980193048ebebcda1522acb Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 14:25:53 -0700 Subject: [PATCH 117/136] Allow build configuration to be set in conf/spark-env.sh --- conf/spark-env.sh.template | 10 +++++++++- make-distribution.sh | 5 +++++ sbt/sbt | 15 +++++++++------ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index c978db00d9..eefd1f86c5 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -19,6 +19,14 @@ # - SPARK_JAVA_OPTS, to set the jvm options for executor backend. Note: This is # only for node-specific options, whereas app-specific options should be set # in the application. -# Examples of node-speicic options : -Dspark.local.dir, GC related options. +# Examples of node-specific options : -Dspark.local.dir, GC related options. # Examples of app-specific options : -Dspark.serializer +# Hadoop version to build against +# export SPARK_HADOOP_VERSION=1.0.4 + +# Uncomment this when using a Hadoop version with YARN +# export SPARK_WITH_YARN=true + +# Extra arguments to pass to `java` when building with SBT +# export EXTRA_BUILD_ARGS="$EXTRA_BUILD_ARGS -Xmx8g" diff --git a/make-distribution.sh b/make-distribution.sh index 70aff418c7..846548617a 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -50,6 +50,11 @@ SPARK_HADOOP_VERSION=1.0.4 SPARK_WITH_YARN=false MAKE_TGZ=false +# Load configuration +if [ -f "$FWDIR/conf/spark-env.sh" ]; then + source "$FWDIR/conf/spark-env.sh" +fi + # Parse arguments while (( "$#" )); do case $1 in diff --git a/sbt/sbt b/sbt/sbt index 397895276c..a38a2985d1 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -17,12 +17,15 @@ # limitations under the License. # -EXTRA_ARGS="" -if [ "$MESOS_HOME" != "" ]; then - EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java" -fi - export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) export SPARK_TESTING=1 # To put test classes on classpath -java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" +if [ -f "$SPARK_HOME/conf/spark-env.sh" ]; then + source "$SPARK_HOME/conf/spark-env.sh" +fi + +if [ "$MESOS_HOME" != "" ]; then + EXTRA_BUILD_ARGS="$EXTRA_BUILD_ARGS -Djava.library.path=$MESOS_HOME/lib/java" +fi + +java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_BUILD_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" From 6585f49841ada637b0811e0aadcf93132fff7001 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 14:51:56 -0700 Subject: [PATCH 118/136] Update build docs --- README.md | 46 +++++++++++++++++++++++++++++++++---- docs/building-with-maven.md | 35 +++++++++++++++++++--------- docs/running-on-yarn.md | 20 ++++++---------- 3 files changed, 73 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 1dd96a0a4a..1e388ff380 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Spark requires Scala 2.9.3 (Scala 2.10 is not yet supported). The project is built using Simple Build Tool (SBT), which is packaged with it. To build Spark and its example programs, run: - sbt/sbt package + sbt/sbt package assembly Spark also supports building using Maven. If you would like to build using Maven, see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html) @@ -43,10 +43,48 @@ locally with one thread, or "local[N]" to run locally with N threads. ## A Note About Hadoop Versions Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported -storage systems. Because the HDFS API has changed in different versions of +storage systems. Because the protocols have changed in different versions of Hadoop, you must build Spark against the same version that your cluster runs. -You can change the version by setting the `HADOOP_VERSION` variable at the top -of `project/SparkBuild.scala`, then rebuilding Spark. +You can change the version by setting the `SPARK_HADOOP_VERSION` environment +when building Spark. + +For Apache Hadoop versions 1.x, 0.20.x, Cloudera CDH MRv1, and other Hadoop +versions without YARN, use: + + # Apache Hadoop 1.2.1 + $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt package assembly + + # Cloudera CDH 4.2.0 with MapReduce v1 + $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt package assembly + +For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions +with YARN, also set `SPARK_WITH_YARN=true`: + + # Apache Hadoop 2.0.5-alpha + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_WITH_YARN=true sbt/sbt package assembly + + # Cloudera CDH 4.2.0 with MapReduce v2 + $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_WITH_YARN=true sbt/sbt package assembly + +For convenience, these variables may also be set through the `conf/spark-env.sh` file +described below. + +When developing a Spark application, specify the Hadoop version by adding the +"hadoop-client" artifact to your project's dependencies. For example, if you're +using Hadoop 0.23.9 and build your application using SBT, add this to +`libraryDependencies`: + + // "force()" is required because "0.23.9" is less than Spark's default of "1.0.4" + "org.apache.hadoop" % "hadoop-client" % "0.23.9" force() + +If your project is built with Maven, add this to your POM file's `` section: + + + org.apache.hadoop + hadoop-client + + [0.23.9] + ## Configuration diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md index 04cd79d039..d71d94fa63 100644 --- a/docs/building-with-maven.md +++ b/docs/building-with-maven.md @@ -8,22 +8,26 @@ title: Building Spark with Maven Building Spark using Maven Requires Maven 3 (the build process is tested with Maven 3.0.4) and Java 1.6 or newer. -Building with Maven requires that a Hadoop profile be specified explicitly at the command line, there is no default. There are two profiles to choose from, one for building for Hadoop 1 or Hadoop 2. +## Specifying the Hadoop version ## -for Hadoop 1 (using 0.20.205.0) use: +To enable support for HDFS and other Hadoop-supported storage systems, specify the exact Hadoop version by setting the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. - $ mvn -Phadoop1 clean install +For Apache Hadoop versions 1.x, 0.20.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: + # Apache Hadoop 1.2.1 + $ mvn -Dhadoop.version=1.2.1 clean install -for Hadoop 2 (using 2.0.0-mr1-cdh4.1.1) use: + # Cloudera CDH 4.2.0 with MapReduce v1 + $ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 clean install - $ mvn -Phadoop2 clean install +For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, enable the "hadoop2-yarn" profile: -It uses the scala-maven-plugin which supports incremental and continuous compilation. E.g. + # Apache Hadoop 2.0.5-alpha + $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.5-alpha clean install - $ mvn -Phadoop2 scala:cc + # Cloudera CDH 4.2.0 with MapReduce v2 + $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.0-cdh4.2.0 clean install -…should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively. ## Spark Tests in Maven ## @@ -31,11 +35,11 @@ Tests are run by default via the scalatest-maven-plugin. With this you can do th Skip test execution (but not compilation): - $ mvn -DskipTests -Phadoop2 clean install + $ mvn -Dhadoop.version=... -DskipTests clean install To run a specific test suite: - $ mvn -Phadoop2 -Dsuites=spark.repl.ReplSuite test + $ mvn -Dhadoop.version=... -Dsuites=spark.repl.ReplSuite test ## Setting up JVM Memory Usage Via Maven ## @@ -53,6 +57,15 @@ To fix these, you can do the following: export MAVEN_OPTS="-Xmx1024m -XX:MaxPermSize=128M" +## Continuous Compilation ## + +We use the scala-maven-plugin which supports incremental and continuous compilation. E.g. + + $ mvn scala:cc + +…should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively. + + ## Using With IntelliJ IDEA ## This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the pom.xml file in the project root folder, you only need to activate either the hadoop1 or hadoop2 profile in the "Maven Properties" popout. We have not tried Eclipse/Scala IDE with this. @@ -61,6 +74,6 @@ This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the deb profile: - $ mvn -Phadoop2,deb clean install + $ mvn -Pdeb clean install The debian package can then be found under repl/target. We added the short commit hash to the file name so that we can distinguish individual packages build for SNAPSHOT versions. diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 9c2cedfd88..6bada9bdd7 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -6,7 +6,7 @@ title: Launching Spark on YARN Experimental support for running over a [YARN (Hadoop NextGen)](http://hadoop.apache.org/docs/r2.0.2-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html) cluster was added to Spark in version 0.6.0. This was merged into master as part of 0.7 effort. -To build spark core with YARN support, please use the hadoop2-yarn profile. +To build spark with YARN support, please use the hadoop2-yarn profile. Ex: mvn -Phadoop2-yarn clean install # Building spark core consolidated jar. @@ -15,18 +15,12 @@ We need a consolidated spark core jar (which bundles all the required dependenci This can be built either through sbt or via maven. - Building spark assembled jar via sbt. - It is a manual process of enabling it in project/SparkBuild.scala. -Please comment out the - HADOOP_VERSION, HADOOP_MAJOR_VERSION and HADOOP_YARN -variables before the line 'For Hadoop 2 YARN support' -Next, uncomment the subsequent 3 variable declaration lines (for these three variables) which enable hadoop yarn support. +Enable YARN support by setting `SPARK_WITH_YARN=true` when invoking sbt: -Assembly of the jar Ex: - - ./sbt/sbt clean assembly + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_WITH_YARN=true ./sbt/sbt clean assembly The assembled jar would typically be something like : -`./core/target/spark-core-assembly-0.8.0-SNAPSHOT.jar` +`./yarn/target/spark-yarn-assembly-0.8.0-SNAPSHOT.jar` - Building spark assembled jar via Maven. @@ -34,16 +28,16 @@ The assembled jar would typically be something like : Something like this. Ex: - mvn -Phadoop2-yarn clean package -DskipTests=true + mvn -Phadoop2-yarn -Dhadoop.version=2.0.5-alpha clean package -DskipTests=true This will build the shaded (consolidated) jar. Typically something like : -`./repl-bin/target/spark-repl-bin--shaded-hadoop2-yarn.jar` +`./yarn/target/spark-yarn-bin--shaded.jar` # Preparations -- Building spark core assembled jar (see above). +- Building spark-yarn assembly (see above). - Your application code must be packaged into a separate JAR file. If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt package`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. From 4d737b6d320993432f286e05b773135fc3334ce3 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 14:56:25 -0700 Subject: [PATCH 119/136] Example should make sense --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1e388ff380..9b2c1a9c4d 100644 --- a/README.md +++ b/README.md @@ -71,19 +71,19 @@ described below. When developing a Spark application, specify the Hadoop version by adding the "hadoop-client" artifact to your project's dependencies. For example, if you're -using Hadoop 0.23.9 and build your application using SBT, add this to +using Hadoop 1.0.1 and build your application using SBT, add this to `libraryDependencies`: - // "force()" is required because "0.23.9" is less than Spark's default of "1.0.4" - "org.apache.hadoop" % "hadoop-client" % "0.23.9" force() + // "force()" is required because "1.0.1" is less than Spark's default of "1.0.4" + "org.apache.hadoop" % "hadoop-client" % "1.0.1" force() If your project is built with Maven, add this to your POM file's `` section: org.apache.hadoop hadoop-client - - [0.23.9] + + [1.0.1] From c02585ea130045ef27e579172ac2acc71bc8da63 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 21 Aug 2013 15:45:45 -0700 Subject: [PATCH 120/136] Make initial connection failure message less daunting. Right now it seems like something has gone wrong when this message is printed out. Instead, this is a normal condition. So I changed the message a bit. --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 30253a94b8..932e70db96 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -562,7 +562,7 @@ def ssh(host, opts, command): except subprocess.CalledProcessError as e: if (tries > 2): raise e - print "Error connecting to host {0}, sleeping 30".format(e) + print "Couldn't connect to host {0}, waiting 30 seconds".format(e) time.sleep(30) tries = tries + 1 From 76077bf9f4b726699ba9e59cdfa9c4361df4ea92 Mon Sep 17 00:00:00 2001 From: Andre Schumacher Date: Tue, 20 Aug 2013 13:22:06 -0700 Subject: [PATCH 121/136] Implementing SPARK-838: Add DoubleRDDFunctions methods to PySpark --- python/pyspark/rdd.py | 60 ++++++++++++++++++- python/pyspark/statcounter.py | 109 ++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 python/pyspark/statcounter.py diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 99f5967a8e..1e9b3bb5c0 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -31,6 +31,7 @@ from pyspark.serializers import batched, Batch, dump_pickle, load_pickle, \ read_from_pickle_file from pyspark.join import python_join, python_left_outer_join, \ python_right_outer_join, python_cogroup +from pyspark.statcounter import StatCounter from py4j.java_collections import ListConverter, MapConverter @@ -357,6 +358,63 @@ class RDD(object): 3 """ return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum() + + def stats(self): + """ + Return a L{StatCounter} object that captures the mean, variance + and count of the RDD's elements in one operation. + """ + def redFunc(left_counter, right_counter): + return left_counter.mergeStats(right_counter) + + return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc) + + def mean(self): + """ + Compute the mean of this RDD's elements. + + >>> sc.parallelize([1, 2, 3]).mean() + 2.0 + """ + return self.stats().mean() + + def variance(self): + """ + Compute the variance of this RDD's elements. + + >>> sc.parallelize([1, 2, 3]).variance() + 0.666... + """ + return self.stats().variance() + + def stdev(self): + """ + Compute the standard deviation of this RDD's elements. + + >>> sc.parallelize([1, 2, 3]).stdev() + 0.816... + """ + return self.stats().stdev() + + def sampleStdev(self): + """ + Compute the sample standard deviation of this RDD's elements (which corrects for bias in + estimating the standard deviation by dividing by N-1 instead of N). + + >>> sc.parallelize([1, 2, 3]).sampleStdev() + 1.0 + """ + return self.stats().sampleStdev() + + def sampleVariance(self): + """ + Compute the sample variance of this RDD's elements (which corrects for bias in + estimating the variance by dividing by N-1 instead of N). + + >>> sc.parallelize([1, 2, 3]).sampleVariance() + 1.0 + """ + return self.stats().sampleVariance() def countByValue(self): """ @@ -777,7 +835,7 @@ def _test(): # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs) + (failure_count, test_count) = doctest.testmod(globs=globs,optionflags=doctest.ELLIPSIS) globs['sc'].stop() if failure_count: exit(-1) diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py new file mode 100644 index 0000000000..8e1cbd4ad9 --- /dev/null +++ b/python/pyspark/statcounter.py @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is ported from spark/util/StatCounter.scala + +import copy +import math + +class StatCounter(object): + + def __init__(self, values=[]): + self.n = 0L # Running count of our values + self.mu = 0.0 # Running mean of our values + self.m2 = 0.0 # Running variance numerator (sum of (x - mean)^2) + + for v in values: + self.merge(v) + + # Add a value into this StatCounter, updating the internal statistics. + def merge(self, value): + delta = value - self.mu + self.n += 1 + self.mu += delta / self.n + self.m2 += delta * (value - self.mu) + return self + + # Merge another StatCounter into this one, adding up the internal statistics. + def mergeStats(self, other): + if not isinstance(other, StatCounter): + raise Exception("Can only merge Statcounters!") + + if other is self: # reference equality holds + self.merge(copy.deepcopy(other)) # Avoid overwriting fields in a weird order + else: + if self.n == 0: + self.mu = other.mu + self.m2 = other.m2 + self.n = other.n + elif other.n != 0: + delta = other.mu - self.mu + if other.n * 10 < self.n: + self.mu = self.mu + (delta * other.n) / (self.n + other.n) + elif self.n * 10 < other.n: + self.mu = other.mu - (delta * self.n) / (self.n + other.n) + else: + self.mu = (self.mu * self.n + other.mu * other.n) / (self.n + other.n) + + self.m2 += other.m2 + (delta * delta * self.n * other.n) / (self.n + other.n) + self.n += other.n + return self + + # Clone this StatCounter + def copy(self): + return copy.deepcopy(self) + + def count(self): + return self.n + + def mean(self): + return self.mu + + def sum(self): + return self.n * self.mu + + # Return the variance of the values. + def variance(self): + if self.n == 0: + return float('nan') + else: + return self.m2 / self.n + + # + # Return the sample variance, which corrects for bias in estimating the variance by dividing + # by N-1 instead of N. + # + def sampleVariance(self): + if self.n <= 1: + return float('nan') + else: + return self.m2 / (self.n - 1) + + # Return the standard deviation of the values. + def stdev(self): + return math.sqrt(self.variance()) + + # + # Return the sample standard deviation of the values, which corrects for bias in estimating the + # variance by dividing by N-1 instead of N. + # + def sampleStdev(self): + return math.sqrt(self.sampleVariance()) + + def __repr__(self): + return "(count: %s, mean: %s, stdev: %s)" % (self.count(), self.mean(), self.stdev()) + From f9cc1fbf272606ce0b679936ef38429b20916cc1 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 17:12:03 -0700 Subject: [PATCH 122/136] Remove references to unsupported Hadoop versions --- README.md | 2 +- docs/building-with-maven.md | 2 +- project/SparkBuild.scala | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9b2c1a9c4d..8502410c52 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Hadoop, you must build Spark against the same version that your cluster runs. You can change the version by setting the `SPARK_HADOOP_VERSION` environment when building Spark. -For Apache Hadoop versions 1.x, 0.20.x, Cloudera CDH MRv1, and other Hadoop +For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: # Apache Hadoop 1.2.1 diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md index d71d94fa63..a9f2cb8a7a 100644 --- a/docs/building-with-maven.md +++ b/docs/building-with-maven.md @@ -12,7 +12,7 @@ Building Spark using Maven Requires Maven 3 (the build process is tested with Ma To enable support for HDFS and other Hadoop-supported storage systems, specify the exact Hadoop version by setting the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. -For Apache Hadoop versions 1.x, 0.20.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: +For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: # Apache Hadoop 1.2.1 $ mvn -Dhadoop.version=1.2.1 clean install diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 81080741ca..cc09bf7dd2 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -24,10 +24,9 @@ import AssemblyKeys._ //import com.jsuereth.pgp.sbtplugin.PgpKeys._ object SparkBuild extends Build { - // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or - // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. - // Note that these variables can be set through the environment variables - // SPARK_HADOOP_VERSION and SPARK_WITH_YARN. + // Hadoop version to build against. For example, "1.0.4" for Apache releases, or + // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set + // through the environment variables SPARK_HADOOP_VERSION and SPARK_WITH_YARN. val DEFAULT_HADOOP_VERSION = "1.0.4" val DEFAULT_WITH_YARN = false From 54e9379de291c3d5e5a9749cfc022a17ff34852c Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 21:13:34 -0700 Subject: [PATCH 123/136] Revert "Allow build configuration to be set in conf/spark-env.sh" This reverts commit 66e7a38a3229eeb6d980193048ebebcda1522acb. --- conf/spark-env.sh.template | 10 +--------- make-distribution.sh | 5 ----- sbt/sbt | 15 ++++++--------- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index eefd1f86c5..c978db00d9 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -19,14 +19,6 @@ # - SPARK_JAVA_OPTS, to set the jvm options for executor backend. Note: This is # only for node-specific options, whereas app-specific options should be set # in the application. -# Examples of node-specific options : -Dspark.local.dir, GC related options. +# Examples of node-speicic options : -Dspark.local.dir, GC related options. # Examples of app-specific options : -Dspark.serializer -# Hadoop version to build against -# export SPARK_HADOOP_VERSION=1.0.4 - -# Uncomment this when using a Hadoop version with YARN -# export SPARK_WITH_YARN=true - -# Extra arguments to pass to `java` when building with SBT -# export EXTRA_BUILD_ARGS="$EXTRA_BUILD_ARGS -Xmx8g" diff --git a/make-distribution.sh b/make-distribution.sh index 846548617a..70aff418c7 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -50,11 +50,6 @@ SPARK_HADOOP_VERSION=1.0.4 SPARK_WITH_YARN=false MAKE_TGZ=false -# Load configuration -if [ -f "$FWDIR/conf/spark-env.sh" ]; then - source "$FWDIR/conf/spark-env.sh" -fi - # Parse arguments while (( "$#" )); do case $1 in diff --git a/sbt/sbt b/sbt/sbt index a38a2985d1..397895276c 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -17,15 +17,12 @@ # limitations under the License. # +EXTRA_ARGS="" +if [ "$MESOS_HOME" != "" ]; then + EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java" +fi + export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) export SPARK_TESTING=1 # To put test classes on classpath -if [ -f "$SPARK_HOME/conf/spark-env.sh" ]; then - source "$SPARK_HOME/conf/spark-env.sh" -fi - -if [ "$MESOS_HOME" != "" ]; then - EXTRA_BUILD_ARGS="$EXTRA_BUILD_ARGS -Djava.library.path=$MESOS_HOME/lib/java" -fi - -java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_BUILD_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" +java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" From 0087b43e9cddc726f661e1e047e63390d5d9b419 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 21:15:00 -0700 Subject: [PATCH 124/136] Use Hadoop 1.2.1 in application example --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8502410c52..e5f527b84a 100644 --- a/README.md +++ b/README.md @@ -71,19 +71,18 @@ described below. When developing a Spark application, specify the Hadoop version by adding the "hadoop-client" artifact to your project's dependencies. For example, if you're -using Hadoop 1.0.1 and build your application using SBT, add this to +using Hadoop 1.0.1 and build your application using SBT, add this entry to `libraryDependencies`: - // "force()" is required because "1.0.1" is less than Spark's default of "1.0.4" - "org.apache.hadoop" % "hadoop-client" % "1.0.1" force() + "org.apache.hadoop" % "hadoop-client" % "1.2.1" If your project is built with Maven, add this to your POM file's `` section: org.apache.hadoop hadoop-client - - [1.0.1] + + [1.2.1] From 9a90667d098487b205ab8fc63c824843e30c739e Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Wed, 21 Aug 2013 21:15:28 -0700 Subject: [PATCH 125/136] Increase ReservedCodeCacheSize to 256m --- sbt/sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbt/sbt b/sbt/sbt index 397895276c..a79f60d087 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -25,4 +25,4 @@ fi export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) export SPARK_TESTING=1 # To put test classes on classpath -java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=128m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" +java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" From 281b6c5f28bd7377fb89097f557f180a7fdf5b50 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 22 Aug 2013 13:22:29 -0700 Subject: [PATCH 126/136] Re-add removed dependency on 'commons-daemon' Fixes SBT build under Hadoop 0.23.9 and 2.0.4 --- project/SparkBuild.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index ae35de71cd..2a80f9c946 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -172,6 +172,7 @@ object SparkBuild extends Build { "log4j" % "log4j" % "1.2.17", "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, + "commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407 "com.ning" % "compress-lzf" % "0.8.4", "org.xerial.snappy" % "snappy-java" % "1.0.5", "org.ow2.asm" % "asm" % "4.0", From 215c13dd41d8500835ef00624a0b4ced2253554e Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 22 Aug 2013 16:13:46 -0700 Subject: [PATCH 127/136] Fix code style and a nondeterministic RDD issue in ALS --- .../spark/mllib/recommendation/ALS.scala | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala index 9097f46db9..dbfbf59975 100644 --- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala @@ -123,18 +123,27 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock) val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock) - // Initialize user and product factors randomly - var users = userOutLinks.mapPartitions {itr => - val rand = new Random() - itr.map({case (x, y) => - (x, y.elementIds.map(u => randomFactor(rank, rand))) - }) + // Initialize user and product factors randomly, but use a deterministic seed for each partition + // so that fault recovery works + val seedGen = new Random() + val seed1 = seedGen.nextInt() + val seed2 = seedGen.nextInt() + // Hash an integer to propagate random bits at all positions, similar to java.util.HashTable + def hash(x: Int): Int = { + val r = x ^ (x >>> 20) ^ (x >>> 12) + r ^ (r >>> 7) ^ (r >>> 4) } - var products = productOutLinks.mapPartitions {itr => - val rand = new Random() - itr.map({case (x, y) => - (x, y.elementIds.map(u => randomFactor(rank, rand))) - }) + var users = userOutLinks.mapPartitionsWithIndex { (index, itr) => + val rand = new Random(hash(seed1 ^ index)) + itr.map { case (x, y) => + (x, y.elementIds.map(_ => randomFactor(rank, rand))) + } + } + var products = productOutLinks.mapPartitionsWithIndex { (index, itr) => + val rand = new Random(hash(seed2 ^ index)) + itr.map { case (x, y) => + (x, y.elementIds.map(_ => randomFactor(rank, rand))) + } } for (iter <- 0 until iterations) { From 39a1d58da484165790c61a924550b58837997f0d Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 15 Jul 2013 17:25:51 +0530 Subject: [PATCH 128/136] Improved documentation for spark custom receiver --- docs/streaming-custom-receivers.md | 53 +++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md index 5476c00d02..dfa343bf94 100644 --- a/docs/streaming-custom-receivers.md +++ b/docs/streaming-custom-receivers.md @@ -7,11 +7,46 @@ A "Spark Streaming" receiver can be a simple network stream, streams of messages This guide shows the programming model and features by walking through a simple sample receiver and corresponding Spark Streaming application. - -## A quick and naive walk-through - ### Write a simple receiver +This starts with implementing [NetworkReceiver](#References) + +Following is a simple socket text-stream receiver. + +{% highlight scala %} + + class SocketTextStreamReceiver(host: String, + port: Int + ) extends NetworkReceiver[String] { + + protected lazy val blocksGenerator: BlockGenerator = + new BlockGenerator(StorageLevel.MEMORY_ONLY_SER_2) + + protected def onStart() = { + blocksGenerator.start() + val socket = new Socket(host, port) + val dataInputStream = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8")) + var data: String = dataInputStream.readLine() + while (data != null) { + blocksGenerator += data + data = dataInputStream.readLine() + } + } + + protected def onStop() { + blocksGenerator.stop() + } + + } + +{% endhighlight %} + + +All we did here is extended NetworkReceiver and called blockGenerator's API method (i.e. +=) to push our blocks of data. Please refer to scala-docs of NetworkReceiver for more details. + + +### An Actor as Receiver. + This starts with implementing [Actor](#References) Following is a simple socket text-stream receiver, which is appearently overly simplified using Akka's socket.io api. @@ -46,7 +81,16 @@ All we did here is mixed in trait Receiver and called pushBlock api method to pu {% endhighlight %} -* Plug-in the actor configuration into the spark streaming context and create a DStream. +* Plug-in the custom receiver into the spark streaming context and create a DStream. + +{% highlight scala %} + + val lines = ssc.networkStream[String](new SocketTextStreamReceiver( + "localhost", 8445)) + +{% endhighlight %} + +* OR Plug-in the actor as receiver into the spark streaming context and create a DStream. {% highlight scala %} @@ -99,3 +143,4 @@ _A more comprehensive example is provided in the spark streaming examples_ ## References 1.[Akka Actor documentation](http://doc.akka.io/docs/akka/2.0.5/scala/actors.html) +2.[NetworkReceiver](http://spark-project.org/docs/latest/api/streaming/index.html#spark.streaming.dstream.NetworkReceiver) From 3049415e245eefac0593a3e33ee7c8a845eeb800 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Fri, 23 Aug 2013 09:40:28 +0530 Subject: [PATCH 129/136] Corrections in documentation comment --- .../scala/spark/streaming/dstream/NetworkInputDStream.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala index 122a529bb7..d98d73a5a8 100644 --- a/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala +++ b/streaming/src/main/scala/spark/streaming/dstream/NetworkInputDStream.scala @@ -128,8 +128,8 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log } /** - * Stops the receiver and reports to exception to the tracker. - * This should be called whenever an exception has happened on any thread + * Stops the receiver and reports exception to the tracker. + * This should be called whenever an exception is to be handled on any thread * of the receiver. */ protected def stopOnError(e: Exception) { @@ -185,7 +185,7 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log } /** - * Batches objects created by a [[spark.streaming.NetworkReceiver]] and puts them into + * Batches objects created by a [[spark.streaming.dstream.NetworkReceiver]] and puts them into * appropriately named blocks at regular intervals. This class starts two threads, * one to periodically start a new batch and prepare the previous batch of as a block, * the other to push the blocks into the block manager. From 2bc348e92c458ea36872ac43a2583370d1f3eb41 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Fri, 23 Aug 2013 09:41:32 +0530 Subject: [PATCH 130/136] Linking custom receiver guide --- docs/streaming-programming-guide.md | 3 +++ .../src/main/scala/spark/streaming/StreamingContext.scala | 2 ++ .../main/scala/spark/streaming/receivers/ActorReceiver.scala | 2 ++ 3 files changed, 7 insertions(+) diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 8cd1b0cd66..a74c17bdb7 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -301,6 +301,9 @@ dstream.checkpoint(checkpointInterval) // checkpointInterval must be a multiple For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and `reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by default set to a multiple of the DStream's sliding interval such that its at least 10 seconds. +## Customizing Receiver +Spark comes with a built in support for most common usage scenarios where input stream source can be either a network socket stream to support for a few message queues. Apart from that it is also possible to supply your own custom receiver via a convenient API. Find more details at [Custom Receiver Guide](streaming-custom-receivers.html) + # Performance Tuning Getting the best performance of a Spark Streaming application on a cluster requires a bit of tuning. This section explains a number of the parameters and configurations that can tuned to improve the performance of you application. At a high level, you need to consider two things:
    diff --git a/streaming/src/main/scala/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/spark/streaming/StreamingContext.scala index 36b841af8f..9abe55ecce 100644 --- a/streaming/src/main/scala/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/spark/streaming/StreamingContext.scala @@ -166,6 +166,7 @@ class StreamingContext private ( /** * Create an input stream with any arbitrary user implemented network receiver. + * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html * @param receiver Custom implementation of NetworkReceiver */ def networkStream[T: ClassManifest]( @@ -178,6 +179,7 @@ class StreamingContext private ( /** * Create an input stream with any arbitrary user implemented actor receiver. + * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html * @param props Props object defining creation of the actor * @param name Name of the actor * @param storageLevel RDD storage level. Defaults to memory-only. diff --git a/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala b/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala index 036c95a860..3eca397f1a 100644 --- a/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala +++ b/streaming/src/main/scala/spark/streaming/receivers/ActorReceiver.scala @@ -28,6 +28,8 @@ object ReceiverSupervisorStrategy { * A receiver trait to be mixed in with your Actor to gain access to * pushBlock API. * + * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html + * * @example {{{ * class MyActor extends Actor with Receiver{ * def receive { From d7f18e3d279bf7f5a56db434f3abf77a5ccd9971 Mon Sep 17 00:00:00 2001 From: Ian Buss Date: Fri, 23 Aug 2013 09:40:16 +0100 Subject: [PATCH 131/136] Pass SBT_OPTS environment through to sbt_launcher --- sbt/sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbt/sbt b/sbt/sbt index a79f60d087..2227bc4696 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -25,4 +25,4 @@ fi export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) export SPARK_TESTING=1 # To put test classes on classpath -java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" +java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS $SBT_OPTS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@" From b7f9e6374ae89568b5b7298d89825eaf0b33cc15 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Fri, 23 Aug 2013 10:26:37 -0700 Subject: [PATCH 132/136] Fix SBT generation of IDE project files --- project/SparkBuild.scala | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 2a80f9c946..fbeae27707 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -57,14 +57,14 @@ object SparkBuild extends Build { // Allows build configuration to be set through environment variables lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) - lazy val isYarnMode = scala.util.Properties.envOrNone("SPARK_WITH_YARN") match { + lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_WITH_YARN") match { case None => DEFAULT_WITH_YARN case Some(v) => v.toBoolean } // Conditionally include the yarn sub-project - lazy val maybeYarn = if(isYarnMode) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() - lazy val maybeYarnRef = if(isYarnMode) Seq[ProjectReference](yarn) else Seq[ProjectReference]() + lazy val maybeYarn = if(isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() + lazy val maybeYarnRef = if(isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]() lazy val allProjects = Seq[ProjectReference](core, repl, examples, bagel, streaming, mllib, tools) ++ maybeYarnRef def sharedSettings = Defaults.defaultSettings ++ Seq( @@ -253,7 +253,14 @@ object SparkBuild extends Build { ) ++ assemblySettings ++ extraAssemblySettings def yarnSettings = sharedSettings ++ Seq( - name := "spark-yarn", + name := "spark-yarn" + ) ++ extraYarnSettings ++ assemblySettings ++ extraAssemblySettings + + // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain + // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). + def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq() + + def yarnEnabledSettings = Seq( libraryDependencies ++= Seq( // Exclude rule required for all ? "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), @@ -261,7 +268,7 @@ object SparkBuild extends Build { "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm), "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm) ) - ) ++ assemblySettings ++ extraAssemblySettings + ) def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq( mergeStrategy in assembly := { From a9db1b7b6eb030feb7beee017d2eca402b73c67c Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Fri, 23 Aug 2013 10:27:18 -0700 Subject: [PATCH 133/136] Upgrade SBT IDE project generators --- project/plugins.sbt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index 1b0f879b94..783b40d4f5 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -6,9 +6,9 @@ resolvers += "Spray Repository" at "http://repo.spray.cc/" addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.5") -addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.1") +addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0") -addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.2.0") +addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1") // For Sonatype publishing //resolvers += Resolver.url("sbt-plugin-releases", new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns) From 2cfe52ef55d87172109db4ac81c1f97300c47278 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sat, 24 Aug 2013 15:16:53 -0700 Subject: [PATCH 134/136] Version bump for ec2 docs --- docs/ec2-scripts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md index bd787e0e46..da0c06e2a6 100644 --- a/docs/ec2-scripts.md +++ b/docs/ec2-scripts.md @@ -74,7 +74,7 @@ another. bidding for the given maximum price (in dollars). - `--spark-version=VERSION` will pre-load the cluster with the specified version of Spark. VERSION can be a version number - (e.g. "0.7.2") or a specific git hash. By default, a recent + (e.g. "0.7.3") or a specific git hash. By default, a recent version will be used. - If one of your launches fails due to e.g. not having the right permissions on your private key file, you can run `launch` with the From 9db1e50344cc49810c8549b5b3022038c64e4dfa Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 26 Aug 2013 11:05:14 -0700 Subject: [PATCH 135/136] Revert "Merge pull request #841 from rxin/json" This reverts commit 1fb1b0992838c8cdd57eec45793e67a0490f1a52, reversing changes made to c69c48947d5102c81a9425cb380d861c3903685c. --- core/pom.xml | 4 + .../scala/spark/deploy/JsonProtocol.scala | 108 +++++++++--------- .../deploy/master/ui/ApplicationPage.scala | 5 +- .../spark/deploy/master/ui/IndexPage.scala | 5 +- .../spark/deploy/worker/ui/IndexPage.scala | 5 +- core/src/main/scala/spark/ui/JettyUtils.scala | 7 +- pom.xml | 5 + project/SparkBuild.scala | 1 + 8 files changed, 76 insertions(+), 64 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 3d70a19584..53696367e9 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -106,6 +106,10 @@ org.scala-lang scala-library + + net.liftweb + lift-json_2.9.2 + it.unimi.dsi fastutil diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/spark/deploy/JsonProtocol.scala index 6b71b953dd..bd1db7c294 100644 --- a/core/src/main/scala/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/spark/deploy/JsonProtocol.scala @@ -17,7 +17,7 @@ package spark.deploy -import scala.util.parsing.json.{JSONArray, JSONObject, JSONType} +import net.liftweb.json.JsonDSL._ import spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} import spark.deploy.master.{ApplicationInfo, WorkerInfo} @@ -25,63 +25,61 @@ import spark.deploy.worker.ExecutorRunner private[spark] object JsonProtocol { + def writeWorkerInfo(obj: WorkerInfo) = { + ("id" -> obj.id) ~ + ("host" -> obj.host) ~ + ("port" -> obj.port) ~ + ("webuiaddress" -> obj.webUiAddress) ~ + ("cores" -> obj.cores) ~ + ("coresused" -> obj.coresUsed) ~ + ("memory" -> obj.memory) ~ + ("memoryused" -> obj.memoryUsed) + } - def writeWorkerInfo(obj: WorkerInfo): JSONType = JSONObject(Map( - "id" -> obj.id, - "host" -> obj.host, - "port" -> obj.port, - "webuiaddress" -> obj.webUiAddress, - "cores" -> obj.cores, - "coresused" -> obj.coresUsed, - "memory" -> obj.memory, - "memoryused" -> obj.memoryUsed, - "state" -> obj.state.toString - )) + def writeApplicationInfo(obj: ApplicationInfo) = { + ("starttime" -> obj.startTime) ~ + ("id" -> obj.id) ~ + ("name" -> obj.desc.name) ~ + ("cores" -> obj.desc.maxCores) ~ + ("user" -> obj.desc.user) ~ + ("memoryperslave" -> obj.desc.memoryPerSlave) ~ + ("submitdate" -> obj.submitDate.toString) + } - def writeApplicationInfo(obj: ApplicationInfo): JSONType = JSONObject(Map( - "starttime" -> obj.startTime, - "id" -> obj.id, - "name" -> obj.desc.name, - "cores" -> obj.desc.maxCores, - "user" -> obj.desc.user, - "memoryperslave" -> obj.desc.memoryPerSlave, - "submitdate" -> obj.submitDate.toString - )) + def writeApplicationDescription(obj: ApplicationDescription) = { + ("name" -> obj.name) ~ + ("cores" -> obj.maxCores) ~ + ("memoryperslave" -> obj.memoryPerSlave) ~ + ("user" -> obj.user) + } - def writeApplicationDescription(obj: ApplicationDescription): JSONType = JSONObject(Map( - "name" -> obj.name, - "cores" -> obj.maxCores, - "memoryperslave" -> obj.memoryPerSlave, - "user" -> obj.user - )) + def writeExecutorRunner(obj: ExecutorRunner) = { + ("id" -> obj.execId) ~ + ("memory" -> obj.memory) ~ + ("appid" -> obj.appId) ~ + ("appdesc" -> writeApplicationDescription(obj.appDesc)) + } - def writeExecutorRunner(obj: ExecutorRunner): JSONType = JSONObject(Map( - "id" -> obj.execId, - "memory" -> obj.memory, - "appid" -> obj.appId, - "appdesc" -> writeApplicationDescription(obj.appDesc) - )) + def writeMasterState(obj: MasterStateResponse) = { + ("url" -> ("spark://" + obj.uri)) ~ + ("workers" -> obj.workers.toList.map(writeWorkerInfo)) ~ + ("cores" -> obj.workers.map(_.cores).sum) ~ + ("coresused" -> obj.workers.map(_.coresUsed).sum) ~ + ("memory" -> obj.workers.map(_.memory).sum) ~ + ("memoryused" -> obj.workers.map(_.memoryUsed).sum) ~ + ("activeapps" -> obj.activeApps.toList.map(writeApplicationInfo)) ~ + ("completedapps" -> obj.completedApps.toList.map(writeApplicationInfo)) + } - def writeMasterState(obj: MasterStateResponse): JSONType = JSONObject(Map( - "url" -> ("spark://" + obj.uri), - "workers" -> obj.workers.toList.map(writeWorkerInfo), - "cores" -> obj.workers.map(_.cores).sum, - "coresused" -> obj.workers.map(_.coresUsed).sum, - "memory" -> obj.workers.map(_.memory).sum, - "memoryused" -> obj.workers.map(_.memoryUsed).sum, - "activeapps" -> JSONArray(obj.activeApps.toList.map(writeApplicationInfo)), - "completedapps" -> JSONArray(obj.completedApps.toList.map(writeApplicationInfo)) - )) - - def writeWorkerState(obj: WorkerStateResponse): JSONType = JSONObject(Map( - "id" -> obj.workerId, - "masterurl" -> obj.masterUrl, - "masterwebuiurl" -> obj.masterWebUiUrl, - "cores" -> obj.cores, - "coresused" -> obj.coresUsed, - "memory" -> obj.memory, - "memoryused" -> obj.memoryUsed, - "executors" -> JSONArray(obj.executors.toList.map(writeExecutorRunner)), - "finishedexecutors" -> JSONArray(obj.finishedExecutors.toList.map(writeExecutorRunner)) - )) + def writeWorkerState(obj: WorkerStateResponse) = { + ("id" -> obj.workerId) ~ + ("masterurl" -> obj.masterUrl) ~ + ("masterwebuiurl" -> obj.masterWebUiUrl) ~ + ("cores" -> obj.cores) ~ + ("coresused" -> obj.coresUsed) ~ + ("memory" -> obj.memory) ~ + ("memoryused" -> obj.memoryUsed) ~ + ("executors" -> obj.executors.toList.map(writeExecutorRunner)) ~ + ("finishedexecutors" -> obj.finishedExecutors.toList.map(writeExecutorRunner)) + } } diff --git a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala index 494a9b914d..405a1ec3a6 100644 --- a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala @@ -17,7 +17,6 @@ package spark.deploy.master.ui -import scala.util.parsing.json.JSONType import scala.xml.Node import akka.dispatch.Await @@ -26,6 +25,8 @@ import akka.util.duration._ import javax.servlet.http.HttpServletRequest +import net.liftweb.json.JsonAST.JValue + import spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} import spark.deploy.JsonProtocol import spark.deploy.master.ExecutorInfo @@ -36,7 +37,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) { implicit val timeout = parent.timeout /** Executor details for a particular application */ - def renderJson(request: HttpServletRequest): JSONType = { + def renderJson(request: HttpServletRequest): JValue = { val appId = request.getParameter("appId") val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] val state = Await.result(stateFuture, 30 seconds) diff --git a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala b/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala index 28e421e3bc..2000211b98 100644 --- a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala +++ b/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala @@ -19,13 +19,14 @@ package spark.deploy.master.ui import javax.servlet.http.HttpServletRequest -import scala.util.parsing.json.JSONType import scala.xml.Node import akka.dispatch.Await import akka.pattern.ask import akka.util.duration._ +import net.liftweb.json.JsonAST.JValue + import spark.Utils import spark.deploy.DeployWebUI import spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} @@ -37,7 +38,7 @@ private[spark] class IndexPage(parent: MasterWebUI) { val master = parent.masterActorRef implicit val timeout = parent.timeout - def renderJson(request: HttpServletRequest): JSONType = { + def renderJson(request: HttpServletRequest): JValue = { val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] val state = Await.result(stateFuture, 30 seconds) JsonProtocol.writeMasterState(state) diff --git a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala b/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala index 02993d58a0..b67059068b 100644 --- a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala +++ b/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala @@ -19,13 +19,14 @@ package spark.deploy.worker.ui import javax.servlet.http.HttpServletRequest -import scala.util.parsing.json.JSONType import scala.xml.Node import akka.dispatch.Await import akka.pattern.ask import akka.util.duration._ +import net.liftweb.json.JsonAST.JValue + import spark.Utils import spark.deploy.JsonProtocol import spark.deploy.DeployMessages.{RequestWorkerState, WorkerStateResponse} @@ -38,7 +39,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) { val worker = parent.worker val timeout = parent.timeout - def renderJson(request: HttpServletRequest): JSONType = { + def renderJson(request: HttpServletRequest): JValue = { val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse] val workerState = Await.result(stateFuture, 30 seconds) JsonProtocol.writeWorkerState(workerState) diff --git a/core/src/main/scala/spark/ui/JettyUtils.scala b/core/src/main/scala/spark/ui/JettyUtils.scala index ba58f35729..f66fe39905 100644 --- a/core/src/main/scala/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/spark/ui/JettyUtils.scala @@ -21,9 +21,10 @@ import javax.servlet.http.{HttpServletResponse, HttpServletRequest} import scala.annotation.tailrec import scala.util.{Try, Success, Failure} -import scala.util.parsing.json.JSONType import scala.xml.Node +import net.liftweb.json.{JValue, pretty, render} + import org.eclipse.jetty.server.{Server, Request, Handler} import org.eclipse.jetty.server.handler.{ResourceHandler, HandlerList, ContextHandler, AbstractHandler} import org.eclipse.jetty.util.thread.QueuedThreadPool @@ -38,8 +39,8 @@ private[spark] object JettyUtils extends Logging { type Responder[T] = HttpServletRequest => T // Conversions from various types of Responder's to jetty Handlers - implicit def jsonResponderToHandler(responder: Responder[JSONType]): Handler = - createHandler(responder, "text/json", (in: JSONType) => in.toString) + implicit def jsonResponderToHandler(responder: Responder[JValue]): Handler = + createHandler(responder, "text/json", (in: JValue) => pretty(render(in))) implicit def htmlResponderToHandler(responder: Responder[Seq[Node]]): Handler = createHandler(responder, "text/html", (in: Seq[Node]) => "" + in.toString) diff --git a/pom.xml b/pom.xml index de883e2abc..85bcd8696c 100644 --- a/pom.xml +++ b/pom.xml @@ -260,6 +260,11 @@ 10.4.2.0 test + + net.liftweb + lift-json_2.9.2 + 2.5 + com.codahale.metrics metrics-core diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index fbeae27707..5fdcf19b62 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -182,6 +182,7 @@ object SparkBuild extends Build { "com.typesafe.akka" % "akka-slf4j" % "2.0.5" excludeAll(excludeNetty), "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", + "net.liftweb" % "lift-json_2.9.2" % "2.5", "org.apache.mesos" % "mesos" % "0.12.1", "io.netty" % "netty-all" % "4.0.0.Beta2", "org.apache.derby" % "derby" % "10.4.2.0" % "test", From a77e0abb96c4d4cd35822656e57b99320a16aec2 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 26 Aug 2013 11:21:03 -0700 Subject: [PATCH 136/136] Added worker state to the cluster master JSON ui. --- core/src/main/scala/spark/deploy/JsonProtocol.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/spark/deploy/JsonProtocol.scala index bd1db7c294..f8dcf025b4 100644 --- a/core/src/main/scala/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/spark/deploy/JsonProtocol.scala @@ -33,7 +33,8 @@ private[spark] object JsonProtocol { ("cores" -> obj.cores) ~ ("coresused" -> obj.coresUsed) ~ ("memory" -> obj.memory) ~ - ("memoryused" -> obj.memoryUsed) + ("memoryused" -> obj.memoryUsed) ~ + ("state" -> obj.state.toString) } def writeApplicationInfo(obj: ApplicationInfo) = {