FROM docker.mimirdb.info/alpine_oraclejdk8 # Metadata LABEL base.image="docker.mimirdb.info/alpine_oraclejdk8" LABEL version="0.1" LABEL software="Spark" LABEL software.version="0.1.201801" LABEL description="Spark image" RUN apk add --update curl bash sed perl grep openssh #dnsmasq drill dhclient #download hadoop #ARG HADOOP_ARCHIVE=http://www.eu.apache.org/dist/hadoop/common/hadoop-2.7.6/hadoop-2.7.6.tar.gz #RUN curl -sL $HADOOP_ARCHIVE | gunzip | tar -x -C /usr/local/ #or copy it COPY hadoop-2.8.2.tar.gz / RUN gunzip -c /hadoop-2.8.2.tar.gz | tar -x -C /usr/local/ && rm /hadoop-2.8.2.tar.gz RUN cd /usr/local && ln -s ./hadoop-2.8.2 hadoop #environmental variables set ENV HADOOP_PREFIX /usr/local/hadoop ENV HADOOP_COMMON_HOME /usr/local/hadoop ENV HADOOP_HDFS_HOME /usr/local/hadoop ENV HADOOP_MAPRED_HOME /usr/local/hadoop ENV HADOOP_YARN_HOME /usr/local/hadoop ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop ENV HADOOP_HOME=/usr/local/hadoop ENV CLUSTER_NAME=test ENV MASTER_IP=0 ENV AWS_ECS=true ENV HDFS_HOST=namenode ENV HDFS_DATA_HOST=datanode ENV CORE_CONF_fs_defaultFS=hdfs://namenode:8020 ENV CORE_CONF_hadoop_http_staticuser_user=root ENV CORE_CONF_hadoop_proxyuser_hue_hosts=* ENV CORE_CONF_hadoop_proxyuser_hue_groups=* ENV HDFS_CONF_dfs_webhdfs_enabled=true ENV HDFS_CONF_dfs_permissions_enabled=false ENV HDFS_CONF_dfs_client_use_datanode_hostname=true ENV HDFS_CONF_dfs_datanode_use_datanode_hostname=true ENV HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false ENV HDFS_CONF_dfs_datanode_address=0.0.0.0:50010 #env for namenode ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name RUN mkdir -p /hadoop/dfs/name VOLUME /hadoop/dfs/name # env for datanode ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data RUN mkdir -p /hadoop/dfs/data VOLUME /hadoop/dfs/data #VOLUME ["type=volume,source=mimir-vol,target=\/usr\/local\/source\/"] #download spark #ARG SPARK_ARCHIVE=http://supergsego.com/apache/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz #RUN curl -sL $SPARK_ARCHIVE | gunzip | tar -x -C /usr/local/ #or copy it COPY spark-2.4.0-bin-without-hadoop.tgz / RUN gunzip -c /spark-2.4.0-bin-without-hadoop.tgz | tar -x -C /usr/local/ && rm /spark-2.4.0-bin-without-hadoop.tgz COPY hadoop-aws-2.8.2.jar aws-java-sdk-1.11.234.jar aws-java-sdk-core-1.11.234.jar aws-java-sdk-kms-1.11.234.jar \ aws-java-sdk-s3-1.11.234.jar hadoop-aws-2.8.2.jar httpclient-4.5.3.jar joda-time-2.9.9.jar /usr/local/spark-2.4.0-bin-without-hadoop/jars/ ENV SPARK_HOME /usr/local/spark-2.4.0-bin-without-hadoop ENV PATH $PATH:$SPARK_HOME/bin ENV INSTANCE_TYPE worker # for High-availability like zoo-keeper's leader election # COPY ha.conf $SPARK_HOME/conf #For dhclient and dnsmasq #RUN echo "option rfc3442-classless-static-routes code 121 = array of unsigned integer 8;" > /etc/dhcp/dhclient.conf && \ # echo "" >> /etc/dhcp/dhclient.conf && \ # echo "send host-name = gethostname();" >> /etc/dhcp/dhclient.conf && \ # echo "request subnet-mask, broadcast-address, time-offset, routers," >> /etc/dhcp/dhclient.conf && \ # echo " domain-name, domain-name-servers, domain-search, host-name," >> /etc/dhcp/dhclient.conf && \ # echo " dhcp6.name-servers, dhcp6.domain-search, dhcp6.fqdn, dhcp6.sntp-servers," >> /etc/dhcp/dhclient.conf && \ # echo " netbios-name-servers, netbios-scope, interface-mtu," >> /etc/dhcp/dhclient.conf && \ # echo " rfc3442-classless-static-routes, ntp-servers;" >> /etc/dhcp/dhclient.conf && \ # echo "" >> /etc/dhcp/dhclient.conf && \ # echo "prepend domain-name-servers 127.0.0.1" >> /etc/dhcp/dhclient.conf && \ # echo "nameserver 127.0.0.1" > /etc/resolv.conf COPY base_entry.sh $SPARK_HOME/ COPY conf/master/spark-defaults.conf $SPARK_HOME/spark-master-defaults.conf COPY conf/worker/spark-defaults.conf $SPARK_HOME/spark-worker-defaults.conf EXPOSE 4040 6066 7001 7005 7015 7077 8080 8020 40000-50000 50070 22 #53 53/udp #setup ssh RUN echo 'root:odinlab' |chpasswd RUN sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config \ && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config \ && ssh-keygen -f /etc/ssh/ssh_host_rsa_key -N '' -t rsa \ && ssh-keygen -f /etc/ssh/ssh_host_dsa_key -N '' -t dsa \ && ssh-keygen -f /etc/ssh/ssh_host_ecdsa_key -N '' -t ecdsa \ && ssh-keygen -f /etc/ssh/ssh_host_ed25519_key -N '' -t ed25519 RUN chmod u+x $SPARK_HOME/base_entry.sh && \ echo "#!/bin/sh" > $SPARK_HOME/master.sh && \ echo "$SPARK_HOME/base_entry.sh \$HDFS_HOST" >> $SPARK_HOME/master.sh && \ echo "cp $SPARK_HOME/spark-master-defaults.conf $SPARK_HOME/conf/spark-defaults.conf" >> $SPARK_HOME/master.sh && \ echo "if [ -f $SPARK_HOME/.initComplete ] || [ -f /hadoop/dfs/name/current/fsimage_0000000000000000000 ]" >> $SPARK_HOME/master.sh && \ echo "then" >> $SPARK_HOME/master.sh && \ echo " echo 'already initialized...'" >> $SPARK_HOME/master.sh && \ echo "else" >> $SPARK_HOME/master.sh && \ echo " $HADOOP_HOME/bin/hdfs --config $HADOOP_HOME/etc/hadoop/ namenode -format $CLUSTER_NAME -force" >> $SPARK_HOME/master.sh && \ echo " touch $SPARK_HOME/.initComplete" >> $SPARK_HOME/master.sh && \ echo "fi " >> $SPARK_HOME/master.sh && \ echo "echo 'hdfs configured'" >> $SPARK_HOME/master.sh && \ echo "export SPARK_DIST_CLASSPATH=\$($HADOOP_HOME/bin/hadoop classpath)" >> $SPARK_HOME/master.sh && \ echo "(nohup $HADOOP_HOME/bin/hdfs --config $HADOOP_HOME/etc/hadoop/ namenode) > hdfs.log 2>&1 &" >> $SPARK_HOME/master.sh && \ #echo "sleep 5" >> $SPARK_HOME/master.sh && \ echo "(sleep 5 && nohup bin/spark-class org.apache.spark.deploy.master.Master -h \$HDFS_HOST) > spark.log 2>&1 &" >> $SPARK_HOME/master.sh && \ #echo "sleep 30" >> $SPARK_HOME/master.sh && \ #echo 'while true; do echo "$(cat spark.log)"; sleep 10; done;' >> $SPARK_HOME/master.sh && \ echo "/usr/sbin/sshd &" >> $SPARK_HOME/master.sh && \ echo 'while true; do uptime; sleep 300; done;' >> $SPARK_HOME/master.sh && \ chmod u+x $SPARK_HOME/master.sh && \ echo "#!/bin/sh" > $SPARK_HOME/worker.sh && \ echo "./base_entry.sh \$HDFS_HOST" >> $SPARK_HOME/worker.sh && \ echo "cp $SPARK_HOME/spark-worker-defaults.conf $SPARK_HOME/conf/spark-defaults.conf" >> $SPARK_HOME/worker.sh && \ echo "export SPARK_DIST_CLASSPATH=\$($HADOOP_HOME/bin/hadoop classpath)" >> $SPARK_HOME/worker.sh && \ echo "(nohup $HADOOP_HOME/bin/hdfs --config $HADOOP_HOME/etc/hadoop/ datanode) > hdfs.log 2>&1 &" >> $SPARK_HOME/worker.sh && \ #echo "sleep 5" >> $SPARK_HOME/worker.sh && \ echo "(sleep 5 && nohup bin/spark-class org.apache.spark.deploy.worker.Worker spark://\$HDFS_HOST:7077) > spark.log 2>&1 &" >> $SPARK_HOME/worker.sh && \ echo "/usr/sbin/sshd &" >> $SPARK_HOME/worker.sh && \ echo 'while true; do uptime; sleep 300; done;' >> $SPARK_HOME/worker.sh && \ chmod u+x $SPARK_HOME/worker.sh WORKDIR $SPARK_HOME