-
Notifications
You must be signed in to change notification settings - Fork 117
169 lines (163 loc) · 5.36 KB
/
SHiELD_parallelworks_intel.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
name: Compile SHiELD SOLO and run tests
# This GitHub Action Workflow is running on the cloud devcimultiintel cluster
# The tests are run inside of a container with the following software/libraries:
# -intel: 2023.2.0
# -hdf5: 1.14.0
# -netcdf-c: 4.9.2
# -netcdf-fortran: 4.6.0
# -cmake
# -libyaml
on:
pull_request:
branches:
- main
# run weekly on Sunday
schedule:
- cron: '0 0 * * 0'
#this should cancel in progress ci runs for the same PR
#(e.g. a second commit on the same PR comes in while CI is still running)
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
checkout:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
runs-on: [gfdlacsciintel]
name: Checkout Code
steps:
# It can take a long time (5-15 minutes) to spinup nodes
# so this salloc will prompt 46 nodes to startup and stay active for 20 min
# this is enough nodes for the first 17 tests to run in parallel, and we
# have 17 runners configured.
- run: salloc --partition=compute -N 46 -J $GITHUB_SHA sleep 20m &
- run: /contrib/fv3/GFDL_atmos_cubed_sphere_CI/checkout.sh -b $GITHUB_REF -h $GITHUB_SHA
build:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
runs-on: [gfdlacsciintel]
name: SOLO SHiELD build
needs: [checkout]
strategy:
fail-fast: true
max-parallel: 17
matrix:
runscript: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/compile.sh]
config: [solo]
hydro: [sw, nh, hydro]
bit: [64bit]
mode: [repro]
steps:
- env:
RUNSCRIPT: ${{ matrix.runscript }}
CONFIG: ${{ matrix.config }}
HYDRO: ${{ matrix.hydro }}
BIT: ${{ matrix.bit }}
MODE: ${{ matrix.mode }}
run: $RUNSCRIPT -b $GITHUB_REF -h $GITHUB_SHA -c $CONFIG --hydro $HYDRO --bit $BIT -m $MODE
test:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
runs-on: [gfdlacsciintel]
name: SOLO SHiELD test suite
needs: [checkout, build]
strategy:
fail-fast: false
max-parallel: 17
matrix:
runscript: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/run_test.sh]
argument:
# These are placed in order of largest to smallest jobs
#layout 8,8 needs 8 nodes on dvcimultiintel cluster
- C512r20.solo.superC
- C768.sw.BTwave
#layout 4,8 needs 4 nodes on dvcimultiintel cluster
- C256r20.solo.superC
- C384.sw.BLvortex
#layout 4,4 needs 2 nodes on dvcimultiintel cluster
- C128r20.solo.superC
- C128r3.solo.TC.d1
- C128r3.solo.TC.h6
- C128r3.solo.TC
- C128r3.solo.TC.tr8
- C192.sw.BLvortex
- C192.sw.BTwave
- C192.sw.modon
- C384.sw.BTwave
#layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster
- C96.solo.BCdry.hyd
- C96.solo.BCdry
- C96.solo.BCmoist.hyd.d3
- C96.solo.BCmoist.hyd
- C96.solo.BCmoist.nhK
- C96.solo.BCmoist
- C96.solo.mtn_rest.hyd.diff2
- C96.solo.mtn_rest.hyd
- C96.solo.mtn_rest.nonmono.diff2
- C96.solo.mtn_rest
- C96.sw.BLvortex
- C96.sw.BTwave
- C96.sw.modon
- C96.sw.RHwave
- d96_1k.solo.mtn_rest_shear.olddamp
- d96_1k.solo.mtn_rest_shear
- d96_1k.solo.mtn_schar.mono
- d96_1k.solo.mtn_schar
- d96_2k.solo.bubble.n0
- d96_2k.solo.bubble.nhK
- d96_2k.solo.bubble
- d96_500m.solo.mtn_schar
steps:
# This will end the slurm job started in the checkout job
- run: scancel -n $GITHUB_SHA
- env:
RUNSCRIPT: ${{ matrix.runscript }}
ARG1: ${{ matrix.argument }}
run: $RUNSCRIPT -t $ARG1 -b $GITHUB_REF -h $GITHUB_SHA
shutdown:
if: always() && github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
runs-on: [gfdlacsciintel]
name: Shutdown Processes
needs: [checkout, build, test]
strategy:
fail-fast: false
max-parallel: 17
matrix:
test:
- C512r20.solo.superC
- C768.sw.BTwave
- C256r20.solo.superC
- C384.sw.BLvortex
- C128r20.solo.superC
- C128r3.solo.TC.d1
- C128r3.solo.TC.h6
- C128r3.solo.TC
- C128r3.solo.TC.tr8
- C192.sw.BLvortex
- C192.sw.BTwave
- C192.sw.modon
- C384.sw.BTwave
- C96.solo.BCdry.hyd
- C96.solo.BCdry
- C96.solo.BCmoist.hyd.d3
- C96.solo.BCmoist.hyd
- C96.solo.BCmoist.nhK
- C96.solo.BCmoist
- C96.solo.mtn_rest.hyd.diff2
- C96.solo.mtn_rest.hyd
- C96.solo.mtn_rest.nonmono.diff2
- C96.solo.mtn_rest
- C96.sw.BLvortex
- C96.sw.BTwave
- C96.sw.modon
- C96.sw.RHwave
- d96_1k.solo.mtn_rest_shear.olddamp
- d96_1k.solo.mtn_rest_shear
- d96_1k.solo.mtn_schar.mono
- d96_1k.solo.mtn_schar
- d96_2k.solo.bubble.n0
- d96_2k.solo.bubble.nhK
- d96_2k.solo.bubble
- d96_500m.solo.mtn_schar
steps:
- run: scancel -n $GITHUB_SHA
- env:
JOB: ${{ github.sha }}_${{ matrix.test }}
run: scancel -n $JOB