docker/kubernetes/spark-containers.yaml
Mike 2ddb6f262f Kubernetes deployment is working
-there is one issue with the mimir hostname not being resolved from spark workers.  may switch to stateful setfor mimir to resolve.
2019-01-30 13:09:31 -05:00

265 lines
7.6 KiB
YAML

#spark-master
#kubectl run namenode --image=docker.mimirdb.info/spark-hadoop --replicas=1 --port=22 --port=6066 --port=7077 --port=8020 --port=8080 --port=50070 --env="MASTER=spark://namenode:7077" --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/master.sh
kind: PersistentVolume
apiVersion: v1
metadata:
name: nn-pv-1
labels:
type: namenode
spec:
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
storageClassName: default
hostPath:
path: "/mnt/hdfs-data/name"
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: dn-pv-1
spec:
capacity:
storage: 3Gi
accessModes:
- ReadWriteOnce
storageClassName: default
hostPath:
path: "/mnt/hdfs-data/1"
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: dn-pv-2
spec:
capacity:
storage: 1Gi
accessModes:
- ReadWriteOnce
storageClassName: default
hostPath:
path: "/mnt/hdfs-data/2"
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: hdfs-name
spec:
selector:
matchLabels:
type: namenode
accessModes:
- ReadWriteOnce
storageClassName: default
resources:
requests:
storage: 1Gi
---
kind: Service
apiVersion: v1
metadata:
creationTimestamp: null
labels:
app: namenode
name: namenode
namespace: default
spec:
ports:
- name: ssh
port: 22
- name: hadoop
port: 6066
- name: spark
port: 7077
- name: hdfs
port: 8020
- name: sparkui
port: 8080
- name: hadoopui
port: 50070
clusterIP: None
selector:
app: namenode
---
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: namenode
spec:
serviceName: "namenode"
replicas: 1
template:
metadata:
labels:
app: namenode
spec:
volumes:
- name: hdfs-name
persistentVolumeClaim:
claimName: hdfs-name
hostNetwork: true
hostPID: true
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: namenode
image: docker.mimirdb.info/spark-hadoop
command: ["/usr/local/spark-2.4.0-bin-without-hadoop/master.sh"]
ports:
- containerPort: 22
- containerPort: 6066
- containerPort: 7077
- containerPort: 8020
- containerPort: 8080
- containerPort: 50070
resources:
requests:
cpu: 100m
env:
- name: MASTER
value: "spark://namenode-0.namenode.default.svc.cluster.local:7077"
- name: MASTER_IP
value: "127.0.0.1"
- name: HDFS_HOST
value: "namenode-0.namenode.default.svc.cluster.local"
- name: CORE_CONF_fs_defaultFS
value: "hdfs://namenode-0.namenode.default.svc.cluster.local:8020"
- name: SPARK_CONF_DIR
value: "/conf"
- name: SPARK_PUBLIC_DNS
value: "namenode-0.namenode.default.svc.cluster.local"
- name: LD_LIBRARY_PATH
value: "/usr/local/hadoop/lib/native/"
- name: SPARK_EXECUTOR_MEMORY
value: "8g"
- name: SPARK_DAEMON_MEMORY
value: "8g"
- name: SPARK_DRIVER_MEMORY
value: "8g"
- name: SPARK_WORKER_MEMORY
value: "8g"
- name: HDFS_CONF_dfs_client_use_datanode_hostname
value: "true"
- name: HDFS_CONF_dfs_datanode_use_datanode_hostname
value: "true"
- name: AWS_ECS
value: "true"
volumeMounts:
- name: hdfs-name
mountPath: /hadoop/dfs/name
---
#spark-worker
#kubectl run $HOSTNAME --image=docker.mimirdb.info/spark-hadoop --replicas=2 --port=$WORKER_PORT --port=$DATANODE_PORT --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="SPARK_WORKER_CORES=4" --env="SPARK_WORKER_PORT=$WORKER_PORT" --env="SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="HDFS_DATA_HOST=$HOSTNAME" --env="HDFS_HOST=spark-master" --env="HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/worker.sh
kind: Service
apiVersion: v1
metadata:
creationTimestamp: null
labels:
app: datanode
name: datanode
namespace: default
spec:
ports:
- name: hdfsdata1
port: 7022
- name: hdfsdata2
port: 7023
- name: hdfsdata3
port: 7024
- name: hdfsdata4
port: 7025
- name: hdfsdata5
port: 7026
- name: hdfsa
port: 8882
- name: hdfsa2
port: 8082
- name: hdfsp
port: 50010
- name: hdfsweb
port: 50075
selector:
app: datanode
clusterIP: None
---
kind: StatefulSet
apiVersion: apps/v1beta1
metadata:
creationTimestamp: null
labels:
app: datanode
name: datanode
namespace: default
spec:
serviceName: "datanode"
replicas: 2
template:
metadata:
creationTimestamp: null
labels:
app: datanode
spec:
hostNetwork: true
hostPID: true
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: datanode
image: docker.mimirdb.info/spark-hadoop
command: ["/usr/local/spark-2.4.0-bin-without-hadoop/worker.sh"]
ports:
- containerPort: 7022
- containerPort: 7023
- containerPort: 7024
- containerPort: 7025
- containerPort: 7026
- containerPort: 8882
- containerPort: 8082
- containerPort: 50010
- containerPort: 50075
resources:
requests:
cpu: 100m
env:
- name: SPARK_CONF_DIR
value: "/conf"
- name: SPARK_WORKER_CORES
value: "4"
- name: SPARK_WORKER_PORT
value: "8882"
- name: SPARK_WORKER_WEBUI_PORT
value: "8082"
- name: LD_LIBRARY_PATH
value: "/usr/local/hadoop/lib/native/"
- name: HDFS_HOST
value: "namenode-0.namenode.default.svc.cluster.local"
- name: CORE_CONF_fs_defaultFS
value: "hdfs://namenode-0.namenode.default.svc.cluster.local:8020"
- name: HDFS_CONF_dfs_datanode_address
value: "0.0.0.0:50010"
- name: SPARK_EXECUTOR_MEMORY
value: "8g"
- name: SPARK_DAEMON_MEMORY
value: "8g"
- name: SPARK_DRIVER_MEMORY
value: "8g"
- name: SPARK_WORKER_MEMORY
value: "8g"
- name: HDFS_CONF_dfs_client_use_datanode_hostname
value: "true"
- name: HDFS_CONF_dfs_datanode_use_datanode_hostname
value: "true"
- name: AWS_ECS
value: "true"
volumeMounts:
- name: hdfs-data
mountPath: /hadoop/dfs/data
volumeClaimTemplates:
- metadata:
name: hdfs-data
spec:
storageClassName: default
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi