-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
62 lines (62 loc) · 2.52 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
apiVersion: run.ai/v1
kind: RunaiJob
metadata:
name: yyan # Name your pod. Start with your initials for example ep-pod-name. Must consist of lower case alphanumeric characters, '-' or '.', and must start and end with an alphanumeric character
namespace: runai-ivrl-yyan # Use `runai list projects` to see what project you belong to. Then put the project namespace. It should be something like runai-ivrl-{your gaspar username}
labels:
user: yyan # Your username
spec:
template:
metadata:
labels:
user: yuhang.yan # User e.g. firstname.lastname
spec:
hostIPC: true
schedulerName: runai-scheduler
restartPolicy: Never
nodeSelector:
run.ai/type: G10 # "S8" (CPU only), "G9" (Nvidia V100) or "G10" (Nvidia A100)
volumes:
- name: data
persistentVolumeClaim:
claimName: runai-ivrl-yyan-ivrldata2 # Put your gaspar username here
- name: ivrl-scratch
persistentVolumeClaim:
claimName: runai-ivrl-yyan-scratch # Put your gaspar username here
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 4Gi # 4G shared memory allocated from memory.
containers:
- name: ubuntu # Name your container (pretty arbitrary)
image: ic-registry.epfl.ch/ivrl/pajouheshgar/pytorch2.01:cuda11.7v2 # The docker image file you want to use. It should be uploaded to ic-registry
env:
- name: CLUSTER_USER
value: yyan # Your epfl username. put inside ""
- name: CLUSTER_USER_ID
value: "286103" # Your epfl UID. put inside ""
- name: CLUSTER_GROUP_NAME
value: "ivrl" # Your group name. put inside ""
- name: CLUSTER_GROUP_ID
value: "30034" # Your epfl GID. put inside ""
workingDir: /
command: [ "/bin/bash", "-c" ] # Change ??? to your username bellow
args: [ "source /opt/lab/setup.sh && su yyan -c 'jupyter lab --ip=0.0.0.0 --no-browser --notebook-dir=/scratch'" ]
ports:
- containerPort: 8888
name: jupyter
imagePullPolicy: Always
resources:
requests:
cpu: 16
memory: "64Gi"
nvidia.com/gpu: 2
limits:
nvidia.com/gpu: 2 # Number of GPU
volumeMounts:
- mountPath: /dev/shm
name: dshm
- mountPath: /scratch
name: ivrl-scratch
- mountPath: /data
name: data