Skip to content

Commit

Permalink
add support for specifying slurm output file
Browse files Browse the repository at this point in the history
  • Loading branch information
cnrrobertson committed Dec 15, 2023
1 parent 0b0ee3d commit ffdd867
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 7 deletions.
30 changes: 24 additions & 6 deletions src/slurm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,37 @@ function launch(manager::SlurmManager, params::Dict, instances_arr::Array,
mkdir(job_file_loc)
end

# Check for given output file name
jobname = "julia-$(getpid())"
has_output_name = ("-o" in srunargs) | ("--output" in srunargs)
if has_output_name
loc = findfirst(x-> x == "-o", srunargs)
if isnothing(loc)
loc = findfirst(x-> x == "--output", srunargs)
end
job_output_name = srunargs[loc+1]
job_output_template = joinpath(job_file_loc, job_output_name)
srunargs[loc+1] = job_output_template
else
job_output_name = "$(jobname)-$(trunc(Int, Base.time() * 10))"
make_job_output_path(task_num) = joinpath(job_file_loc, "$(job_output_name)-$(task_num).out")
job_output_template = make_job_output_path("%4t")
append!(srunargs, "-o", job_output_template)
end

np = manager.np
jobname = "julia-$(getpid())"
job_output_name = "$(jobname)-$(trunc(Int, Base.time() * 10))"
make_job_output_path(task_num) = joinpath(job_file_loc, "$(job_output_name)-$(task_num).out")
job_output_template = make_job_output_path("%4t")
srun_cmd = `srun -J $jobname -n $np -o "$(job_output_template)" -D $exehome $(srunargs) $exename $exeflags $(worker_arg())`
srun_cmd = `srun -J $jobname -n $np -D $exehome $(srunargs) $exename $exeflags $(worker_arg())`
srun_proc = open(srun_cmd)
slurm_spec_regex = r"([\w]+):([\d]+)#(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})"
retry_delays = manager.retry_delays
for i = 0:np - 1
println("connecting to worker $(i + 1) out of $np")
slurm_spec_match = nothing
fn = make_job_output_path(lpad(i, 4, "0"))
if has_output_name
fn = job_output_template
else
fn = make_job_output_path(lpad(i, 4, "0"))
end
t0 = time()
for retry_delay in retry_delays
# Wait for output log to be created and populated, then parse
Expand Down
7 changes: 6 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,19 @@ end

if "slurm" in ARGS
@testset "Slurm" begin
p = addprocs_slurm(1)
out_file = "my_slurm_job.out"
p = addprocs_slurm(1; o=out_file)
@test nprocs() == 2
@test workers() == p
@test fetch(@spawnat :any myid()) == p[1]
@test remotecall_fetch(+,p[1],1,1) == 2
rmprocs(p)
@test nprocs() == 1
@test workers() == [1]

# Check output file creation
@test isfile(out_file)
rm(out_file)
end
end

Expand Down

0 comments on commit ffdd867

Please sign in to comment.