-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_cluster.sh
executable file
·341 lines (329 loc) · 12.4 KB
/
build_cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# #!/usr/bin/bash
set -ex
# ## Step 1: Provisioning the Servers and Installing Docker
#
# We will assume you are setting up a cluster with one head node and two
# workers. Adding more workers is straightforward. If you only have
# two machines available, you can just build one worker. If you only
# have one machine, it can serve as the head node and a worker node, but
# the slurm services might cause measurement noise in your CFiddle
# experiments.
#
# Start off by
#
# 1. Provisioning the three machines with Ubuntu 22.04
# 2. Make sure you can ssh into them.
#
# SSH into the head node and run the following commands: SSH in,
# install `git`, and checkout this repo and install docker (these
# you'll need to cut and paste)
#
# export DEBIAN_FRONTEND=noninteractive
# apt-get update && apt-get upgrade -y && apt-get install -y git
# git clone https://github.com/NVSL/cfiddle-cluster.git
# ./cfiddle-cluster/install_docker.sh
#
# From here on we'll refer to the IP address of the head node as
# `HEAD_ADDR`, and use `WORKER_ADDRS` to refer to the list of workers.
# Both of these can be set in `config.sh`.
#
# If you get your machines from a cloud provider, they have two IP
# addresses -- An private IP address on the cloud provider's network and
# a public address that you can SSH into. We are going to use the
# _external_ addresses for setting up the cluster.
#
# ## Step 2: Setting up Head Node
#
# SSH into the head node. Edit `config.sh` to include the IP addresses
# of your machines and the dockerhub username:
#
# cd cfiddle-cluster
# pico config.sh
#
# Or can put them in `cluster_nodes.sh`. For `WORKER_ADDRS` you should set
# it to a space-separated list of IP addresses. For example
#
# export WORKER_ADDRS="127.0.0.1 127.0.0.2"
#
# Your cluster is now configured! To bring it up and test it, just run this script:
#
# ./build_cluster.sh
#
# Read on to see what the script is doing, but all the code that follows should work as written.
# You may need to approve the ssh keys for the worker nodes.
#
# Set up your environment:
#
source config.sh
./check_config_sanity.sh
# Verify ssh works.
for W in $WORKER_ADDRS; do
ssh $W true
done
#
# You'll need to do that everytime you login to maintain your cluster.
#
# ## Step 3: Install Docker on the Workers
#
# Copy of the script to install docker and then run it.
for W in $WORKER_ADDRS; do scp install_docker.sh $W:install_docker.sh; ssh $W bash ./install_docker.sh; done
# ## Step 3: Create A Docker Swarm
#
# [Docker Swarm](https://docs.docker.com/engine/swarm/) is a tool for
# orchestrating Docker containers across multiple machines. It's going
# to do all the heavy lifting of starting and tending to the Slurm nodes.
#
# If you aren't familiar with swarm and/or the steps below don't work
# as expected, do the [swarm
# tutorial](https://docs.docker.com/engine/swarm/swarm-tutorial/).
#
# First, on your head node, create the swarm:
#
docker swarm init --advertise-addr $HEAD_ADDR
SWARM_TOKEN=$(docker swarm join-token worker -q)
#
# It'll respond with something like:
#
# ```
# Swarm initialized: current node (dxn1zf6l61qsb1josjja83ngz) is now a manager.
#
# To add a worker to this swarm, run the following command:
#
# docker swarm join --token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c 192.168.99.100:2377
#
# To add a manager to this swarm, run 'docker swarm join-token manager' and follow the instructions.
# ```
#
# Copy the `docker swarm join` command, and run it on each of the workers:
#
for W in $WORKER_ADDRS; do ssh $W "docker swarm join --token $SWARM_TOKEN $HEAD_ADDR:2377";done
#
# And verify that your swarm now has three members:
#
docker node ls
#
# Which should give something like this:
#
# ```
# ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
# 7ji737xit1a0wz4f5vi3adxu7 * cfiddle-cluster-testing Ready Active Leader 24.0.5
# 4v0hj6bslg4baj5fjog1vjqjp cfiddle-cluster-worker-0 Ready Active 24.0.5
# gx2tvady2o4i4gi5ffd0l8bk0 cfiddle-cluster-worker-1 Ready Active 24.0.5
# ```
#
# Next we need to label the nodes so we can constraint where the slurm
# services run. These commands will do it. Your hostnames might be
# different, but they should match the output of the `docker node ls`
# command above:
#
WORKER_NODE_IDS=$(docker node ls --format '{{.ID}} {{.ManagerStatus}}' | grep -v Leader | cut -f 1 -d ' ')
HEAD_NODE_ID=$(docker node ls --format '{{.ID}} {{.ManagerStatus}}' | grep Leader | cut -f 1 -d ' ')
docker node update --label-add slurm_role=head_node $HEAD_NODE_ID
for W in $WORKER_NODE_IDS; do docker node update --label-add slurm_role=worker $W;done
#
# `docker-compose.yml` contains constraints that will ensure that one
# worker container runs on each worker node.
#
#
# ## Step 5: Build the Docker Image
#
# Grab the latest Cfiddle and delegate_function
#
git clone -b $DELEGATE_FUNCTION_GIT_TAG http://github.com/NVSL/delegate-function
git clone -b $CFIDDLE_GIT_TAG http://github.com/NVSL/cfiddle
#
# Then, we can build the docker images
#
docker compose build --progress=plain
#
# ## Step 7: Distribute The Docker Images
#
# We need to share the images we built with the worker nodes. The
# "right" way to do this is with a [local docker
# registery](https://docs.docker.com/registry/deploying/). I couldn't
# get that to work, we will use [docker hub](http://dockerhub.com/)
# instead.
#
# You can then distributed the images to the workers with:
#
./distribute_images.sh
#
# ## Step 8: Create User Accounts
#
# First, we'll create the `cfiddle` account on each worker. It will be
# a privileged account that will be used to spawn the sandbox docker
# container on the worker nodes, so it needs to be in the `docker`
# group.
#
for W in $WORKER_ADDRS; do ssh $W useradd -r -s /usr/sbin/nologin -u 7000 -G docker cfiddle;done
# Second, we'll create the jupyter test user. It is a stand-in for your
# real users. We are going to create it locally, with its home
# directory in `/home` and then mount them via NFS into the
# containers. As mentioned above, you'll probably want different, more
# permanent/maintainable solution for creating users.
#
# One nice thing about slurm is that we don't need to create the user
# on the worker nodes. Everything is based on numeric user IDs.
#
./create_jovyan.sh
#
# Then we will populate the jovyan account (which is for testing jupyter notebook) by exatract its contents from the userage and copying them into the local home directory:
docker create --name extract_jovyan cfiddle-user # create a data only container we can copy out out of
docker cp extract_jovyan:/home/jovyan /home/
chown -R jovyan /home/jovyan
docker container rm extract_jovyan # cleanup
#
#
# #root@cfiddle-cluster-testing:~/cfiddle-cluster# groupadd --gid 1001 docker_users
# #-- probably not necessary? But the group ids for the docker group don't match across docker images and the physical machines
# #root@cfiddle-cluster-testing:~/cfiddle-cluster#
# #root@cfiddle-cluster-testing:~/cfiddle-cluster# groupadd cfiddlers -- necessary?
# ## Step 9: Set up the Munge Key
#
# Munge is what slurm uses for user authentication and it needs a
# private key set that will be shared across all the members of
# cluster _and_ the hosts that user submit jobs from.
#
# The key lives in /etc/munge/ and each image we built has a different
# key, which won't work. So, we will extract the key from one of
# them, store it locally on the the head node and mount it vis nfs.
mkdir /etc/munge
docker create --name extract_munge cfiddle-cluster:latest # create a data only container we can copy out out of
docker cp extract_munge:/etc/munge/munge.key /etc/munge/munge.key
chown -R $MUNGE_UID:$MUNGE_GID /etc/munge
chmod -R go-rwx /etc/munge
docker container rm extract_munge # cleanup
#exportfs -ra # Let nfsd know that /etc/munge not exists
# ## Step 6: Set up NFS
#
# For our quick-and-dirty NFS server, we need to load the necessary modules, install a package, and populate `/etc/exports`:
#
modprobe nfs
modprobe nfsd
apt-get install -y nfs-kernel-server
echo '/home *(rw,no_subtree_check) ## cfiddle_cluster' >> /etc/exports
echo '/etc/munge *(rw,no_subtree_check,no_root_squash) ## cfiddle_cluster' >> /etc/exports
exportfs -ra
#
# THis will complain about /etc/munge not existing. We'll fix that shortly.
#
# You can test it with :
#
# ```
mount -t nfs localhost:/home /mnt
ls /mnt/
[ $(ls /mnt | grep jovyan) == "jovyan" ]
# ```
#
# Which should yield:
#
# ```
# jovyan
# ```
#
# Clean up the test mount:
#
umount /mnt
#
# ## Step 9: Bring up the Cluster
#
# To bring up the cluster, we can just do:
#
./start_cluster.sh
#
# which will yield:
#
# ```
# Ignoring unsupported options: build
#
# Creating network slurm-stack_default
# Creating service slurm-stack_c1-srv
# Creating service slurm-stack_c2-srv
# Creating service slurm-stack_mysql-srv
# Creating service slurm-stack_slurmdbd-srv
# Creating service slurm-stack_slurmctld-srv
# Creating service slurm-stack_userhost-srv
# ```
#
# You can check that things are running with :
#
docker service ls
#
# Which should show:
#
# ```
# ID NAME MODE REPLICAS IMAGE PORTS
# 8pza528flemh slurm-stack_c1-srv replicated 1/1 cfiddle-cluster:latest
# kwgfrf3qq6fw slurm-stack_c2-srv replicated 1/1 cfiddle-cluster:latest
# gf5ptcaagz63 slurm-stack_mysql-srv replicated 1/1 mysql:5.7
# k7jq67iu3dcd slurm-stack_sandbox-dummy replicated 0/0 cfiddle-sandbox:latest
# h3c4rgkbcamg slurm-stack_slurmctld-srv replicated 1/1 cfiddle-cluster:latest
# 3un6uhoyl51c slurm-stack_slurmdbd-srv replicated 1/1 cfiddle-cluster:latest
# 1tfmmmq62qe1 slurm-stack_userhost-srv replicated 1/1 cfiddle-user:latest *:8888->8888/tcp
# ```
#
# The `c1` and `c2` services are running on our two worker nodes to run
# Slurm jobs. The next three are running on the head node to managed
# the cluster.
#
# The `userhost` container is stand-in for the machines where users will
# do their work and submit jobs.
#
# Things are running properly, if you the `REPLICAS` column contains
# `1/1` on each line. If one of them has `0/1`, you can check the
# status of that service with (e.g., for `slurm-stack_slurmctld-srv`):
#
# docker service ps --no-trunc slurm-stack_slurmctld-srv
#
# and dive deeper with
#
# docker service logs slurm-stack_slurmctld-srv
#
# ## Step 10: Test The Slurm Cluster
#
# Now we can test the operation of the Slurm cluster. To do that we'll
# start a shell in user node container, but first we need its name:
#
# ```
docker container ls
# ```
# Yielding:
# ```
# CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
# 9753ad8fc5de cfiddle-cluster:latest "/usr/local/bin/dock…" 7 minutes ago Up 7 minutes slurm-stack_userhost-srv.1.pfzqidlnq7ow74y8eys3tr5vs
# 948cf848d285 cfiddle-cluster:latest "/usr/local/bin/dock…" 7 minutes ago Up 7 minutes slurm-stack_slurmctld-srv.1.hwkx0uibtsohblfxyl5zcybz7
# dd8e12974325 cfiddle-cluster:latest "/usr/local/bin/dock…" 7 minutes ago Up 7 minutes slurm-stack_slurmdbd-srv.1.yz65lbuqf2pmme2ovj2sges8s
# f7b4042bea25 mysql:5.7 "docker-entrypoint.s…" 7 minutes ago Up 7 minutes 3306/tcp, 33060/tcp slurm-stack_mysql-srv.1.4sa09y0co9mwnq1ismae1kgfg
# ```
#
# Your container names will be different but you want the one with `userhost` in it.
#
# Wait 30 seconds for everything to come up.
(c=0
while [ $c -lt 30 ];do
. config.sh
if docker exec -it $userhost sinfo; then
exit 0
fi
sleep 1
c=$[c+1]
echo .
done
exit 1
)
# It should provide information about your cluster:
#
# PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
# normal* up infinite 2 idle c[1-2]
# config.sh extracts these sevice names and stores them in some environment variables, so we source it again
. config.sh
echo $userhost
echo $slurmctld
echo $slurmdbd
echo $mysql
#
# Then we can submit a job:
docker exec -it $userhost salloc srun bash -c 'echo -ne "hello from "; hostname'
# and that's it!
echo "####################### Building the cluster completed successfully! #######################"
exit 0