From 0368197001afad828d810edb40a02fc4515a3d8f Mon Sep 17 00:00:00 2001 From: Christian Meesters Date: Mon, 4 Nov 2024 12:44:06 +0100 Subject: [PATCH] fix: sbatch stderr parsing (#161) will hopefully fix #157 The issue is, that submission joined `stderr` and `stdout` of the `sbatch` call. Without add-ons `sbatch` only emits to `stdout` and to `stderr` only in the case of an error. However, admins can add informative messages to `stderr`, when this occurs, parsing the message for the JobID failed. Now, `stderr` and `stdout` are considered separately. ## Summary by CodeRabbit - **New Features** - Enhanced error handling during SLURM job submission, providing clearer feedback on failures. - Improved job ID retrieval by stripping whitespace from the output. - **Bug Fixes** - Addressed issues with job submission failures by capturing both standard output and error messages. - **Chores** - Minor adjustments to logging for better clarity during job submission and error reporting. --------- Co-authored-by: Filipe G. Vieira <1151762+fgvieira@users.noreply.github.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- snakemake_executor_plugin_slurm/__init__.py | 23 +++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py index 9b214c0..3e6349b 100644 --- a/snakemake_executor_plugin_slurm/__init__.py +++ b/snakemake_executor_plugin_slurm/__init__.py @@ -217,20 +217,35 @@ def run_job(self, job: JobExecutorInterface): self.logger.debug(f"sbatch call: {call}") try: - out = subprocess.check_output( - call, shell=True, text=True, stderr=subprocess.STDOUT - ).strip() + process = subprocess.Popen( + call, + shell=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + out, err = process.communicate() + if process.returncode != 0: + raise subprocess.CalledProcessError( + process.returncode, call, output=err + ) except subprocess.CalledProcessError as e: raise WorkflowError( f"SLURM job submission failed. The error message was {e.output}" ) + if err: # any other error message? + raise WorkflowError( + f"SLURM job submission failed. The error message was {err}" + ) # multicluster submissions yield submission infos like # "Submitted batch job on cluster " by default, but with the # --parsable option it simply yields ";". # To extract the job id we split by semicolon and take the first element # (this also works if no cluster name was provided) - slurm_jobid = out.split(";")[0] + slurm_jobid = out.strip().split(";")[0] + if not slurm_jobid: + raise WorkflowError("Failed to retrieve SLURM job ID from sbatch output.") slurm_logfile = slurm_logfile.replace("%j", slurm_jobid) self.logger.info( f"Job {job.jobid} has been submitted with SLURM jobid {slurm_jobid} "