2ddb6f262f
-there is one issue with the mimir hostname not being resolved from spark workers. may switch to stateful setfor mimir to resolve.
265 lines
7.6 KiB
YAML
265 lines
7.6 KiB
YAML
#spark-master
|
|
#kubectl run namenode --image=docker.mimirdb.info/spark-hadoop --replicas=1 --port=22 --port=6066 --port=7077 --port=8020 --port=8080 --port=50070 --env="MASTER=spark://namenode:7077" --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/master.sh
|
|
kind: PersistentVolume
|
|
apiVersion: v1
|
|
metadata:
|
|
name: nn-pv-1
|
|
labels:
|
|
type: namenode
|
|
spec:
|
|
capacity:
|
|
storage: 5Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: default
|
|
hostPath:
|
|
path: "/mnt/hdfs-data/name"
|
|
---
|
|
kind: PersistentVolume
|
|
apiVersion: v1
|
|
metadata:
|
|
name: dn-pv-1
|
|
spec:
|
|
capacity:
|
|
storage: 3Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: default
|
|
hostPath:
|
|
path: "/mnt/hdfs-data/1"
|
|
---
|
|
kind: PersistentVolume
|
|
apiVersion: v1
|
|
metadata:
|
|
name: dn-pv-2
|
|
spec:
|
|
capacity:
|
|
storage: 1Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: default
|
|
hostPath:
|
|
path: "/mnt/hdfs-data/2"
|
|
---
|
|
kind: PersistentVolumeClaim
|
|
apiVersion: v1
|
|
metadata:
|
|
name: hdfs-name
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
type: namenode
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: default
|
|
resources:
|
|
requests:
|
|
storage: 1Gi
|
|
---
|
|
kind: Service
|
|
apiVersion: v1
|
|
metadata:
|
|
creationTimestamp: null
|
|
labels:
|
|
app: namenode
|
|
name: namenode
|
|
namespace: default
|
|
spec:
|
|
ports:
|
|
- name: ssh
|
|
port: 22
|
|
- name: hadoop
|
|
port: 6066
|
|
- name: spark
|
|
port: 7077
|
|
- name: hdfs
|
|
port: 8020
|
|
- name: sparkui
|
|
port: 8080
|
|
- name: hadoopui
|
|
port: 50070
|
|
clusterIP: None
|
|
selector:
|
|
app: namenode
|
|
---
|
|
apiVersion: apps/v1beta1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: namenode
|
|
spec:
|
|
serviceName: "namenode"
|
|
replicas: 1
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: namenode
|
|
spec:
|
|
volumes:
|
|
- name: hdfs-name
|
|
persistentVolumeClaim:
|
|
claimName: hdfs-name
|
|
hostNetwork: true
|
|
hostPID: true
|
|
dnsPolicy: ClusterFirstWithHostNet
|
|
containers:
|
|
- name: namenode
|
|
image: docker.mimirdb.info/spark-hadoop
|
|
command: ["/usr/local/spark-2.4.0-bin-without-hadoop/master.sh"]
|
|
ports:
|
|
- containerPort: 22
|
|
- containerPort: 6066
|
|
- containerPort: 7077
|
|
- containerPort: 8020
|
|
- containerPort: 8080
|
|
- containerPort: 50070
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
env:
|
|
- name: MASTER
|
|
value: "spark://namenode-0.namenode.default.svc.cluster.local:7077"
|
|
- name: MASTER_IP
|
|
value: "127.0.0.1"
|
|
- name: HDFS_HOST
|
|
value: "namenode-0.namenode.default.svc.cluster.local"
|
|
- name: CORE_CONF_fs_defaultFS
|
|
value: "hdfs://namenode-0.namenode.default.svc.cluster.local:8020"
|
|
- name: SPARK_CONF_DIR
|
|
value: "/conf"
|
|
- name: SPARK_PUBLIC_DNS
|
|
value: "namenode-0.namenode.default.svc.cluster.local"
|
|
- name: LD_LIBRARY_PATH
|
|
value: "/usr/local/hadoop/lib/native/"
|
|
- name: SPARK_EXECUTOR_MEMORY
|
|
value: "8g"
|
|
- name: SPARK_DAEMON_MEMORY
|
|
value: "8g"
|
|
- name: SPARK_DRIVER_MEMORY
|
|
value: "8g"
|
|
- name: SPARK_WORKER_MEMORY
|
|
value: "8g"
|
|
- name: HDFS_CONF_dfs_client_use_datanode_hostname
|
|
value: "true"
|
|
- name: HDFS_CONF_dfs_datanode_use_datanode_hostname
|
|
value: "true"
|
|
- name: AWS_ECS
|
|
value: "true"
|
|
volumeMounts:
|
|
- name: hdfs-name
|
|
mountPath: /hadoop/dfs/name
|
|
---
|
|
#spark-worker
|
|
#kubectl run $HOSTNAME --image=docker.mimirdb.info/spark-hadoop --replicas=2 --port=$WORKER_PORT --port=$DATANODE_PORT --env="SPARK_CONF_DIR=/conf" --env="SPARK_PUBLIC_DNS=127.0.0.1" --env="SPARK_WORKER_CORES=4" --env="SPARK_WORKER_PORT=$WORKER_PORT" --env="SPARK_WORKER_WEBUI_PORT=$WORKER_WEBUI_PORT" --env="LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/" --env="HDFS_DATA_HOST=$HOSTNAME" --env="HDFS_HOST=spark-master" --env="HDFS_CONF_dfs_datanode_address=0.0.0.0:$DATANODE_PORT" --env="SPARK_EXECUTOR_MEMORY=8g" --env="SPARK_DAEMON_MEMORY=8g" --env="SPARK_DRIVER_MEMORY=8g" --env="SPARK_WORKER_MEMORY=8g" --env="HDFS_CONF_dfs_client_use_datanode_hostname=true" --env="AWS_ECS=false" --command /usr/local/spark-2.2.0-bin-without-hadoop/worker.sh
|
|
kind: Service
|
|
apiVersion: v1
|
|
metadata:
|
|
creationTimestamp: null
|
|
labels:
|
|
app: datanode
|
|
name: datanode
|
|
namespace: default
|
|
spec:
|
|
ports:
|
|
- name: hdfsdata1
|
|
port: 7022
|
|
- name: hdfsdata2
|
|
port: 7023
|
|
- name: hdfsdata3
|
|
port: 7024
|
|
- name: hdfsdata4
|
|
port: 7025
|
|
- name: hdfsdata5
|
|
port: 7026
|
|
- name: hdfsa
|
|
port: 8882
|
|
- name: hdfsa2
|
|
port: 8082
|
|
- name: hdfsp
|
|
port: 50010
|
|
- name: hdfsweb
|
|
port: 50075
|
|
selector:
|
|
app: datanode
|
|
clusterIP: None
|
|
---
|
|
kind: StatefulSet
|
|
apiVersion: apps/v1beta1
|
|
metadata:
|
|
creationTimestamp: null
|
|
labels:
|
|
app: datanode
|
|
name: datanode
|
|
namespace: default
|
|
spec:
|
|
serviceName: "datanode"
|
|
replicas: 2
|
|
template:
|
|
metadata:
|
|
creationTimestamp: null
|
|
labels:
|
|
app: datanode
|
|
spec:
|
|
hostNetwork: true
|
|
hostPID: true
|
|
dnsPolicy: ClusterFirstWithHostNet
|
|
containers:
|
|
- name: datanode
|
|
image: docker.mimirdb.info/spark-hadoop
|
|
command: ["/usr/local/spark-2.4.0-bin-without-hadoop/worker.sh"]
|
|
ports:
|
|
- containerPort: 7022
|
|
- containerPort: 7023
|
|
- containerPort: 7024
|
|
- containerPort: 7025
|
|
- containerPort: 7026
|
|
- containerPort: 8882
|
|
- containerPort: 8082
|
|
- containerPort: 50010
|
|
- containerPort: 50075
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
env:
|
|
- name: SPARK_CONF_DIR
|
|
value: "/conf"
|
|
- name: SPARK_WORKER_CORES
|
|
value: "4"
|
|
- name: SPARK_WORKER_PORT
|
|
value: "8882"
|
|
- name: SPARK_WORKER_WEBUI_PORT
|
|
value: "8082"
|
|
- name: LD_LIBRARY_PATH
|
|
value: "/usr/local/hadoop/lib/native/"
|
|
- name: HDFS_HOST
|
|
value: "namenode-0.namenode.default.svc.cluster.local"
|
|
- name: CORE_CONF_fs_defaultFS
|
|
value: "hdfs://namenode-0.namenode.default.svc.cluster.local:8020"
|
|
- name: HDFS_CONF_dfs_datanode_address
|
|
value: "0.0.0.0:50010"
|
|
- name: SPARK_EXECUTOR_MEMORY
|
|
value: "8g"
|
|
- name: SPARK_DAEMON_MEMORY
|
|
value: "8g"
|
|
- name: SPARK_DRIVER_MEMORY
|
|
value: "8g"
|
|
- name: SPARK_WORKER_MEMORY
|
|
value: "8g"
|
|
- name: HDFS_CONF_dfs_client_use_datanode_hostname
|
|
value: "true"
|
|
- name: HDFS_CONF_dfs_datanode_use_datanode_hostname
|
|
value: "true"
|
|
- name: AWS_ECS
|
|
value: "true"
|
|
volumeMounts:
|
|
- name: hdfs-data
|
|
mountPath: /hadoop/dfs/data
|
|
volumeClaimTemplates:
|
|
- metadata:
|
|
name: hdfs-data
|
|
spec:
|
|
storageClassName: default
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 1Gi |