From 85c01cbc66ceb0cab43ce4f3575695537c211fca Mon Sep 17 00:00:00 2001 From: "katarzyna.otylia.sikora@gmail.com" Date: Tue, 21 Jan 2025 13:37:30 +0100 Subject: [PATCH] predict chip dict working --- pyproject.toml | 3 +- snakePipes/common_functions.py | 30 +++++++++++++------ snakePipes/snakePipes.py | 4 +-- .../workflows/ChIPseq/internals.snakefile | 2 +- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 69d09d8f4..cc844fbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,8 @@ dependencies = [ "snakemake >= 8", "pandas", "thefuzz", - "pyyaml >= 5.1", +# "pyyaml >= 5.1", + "ruamel.yaml", "snakemake-executor-plugin-cluster-generic >= 1.0.9", "graphviz" ] diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index a9f008867..67a527982 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -5,7 +5,8 @@ import subprocess import os import re -import yaml +#import yaml +from ruamel.yaml import YAML import glob import sys import shutil @@ -88,8 +89,10 @@ def namesOKinR(sampleNames): def load_configfile(configFiles, verbose, info='Config'): + yaml=YAML(typ='safe') with open(configFiles, "r") as f: - config = yaml.load(f, Loader=yaml.FullLoader) + #config = yaml.load(f, Loader=yaml.FullLoader) + config = yaml.load(f) config = sanity_dict_clean(config) @@ -102,9 +105,15 @@ def load_configfile(configFiles, verbose, info='Config'): return config -def write_configfile(configFile, config): +def write_configfile(configFile, config, trafo): + yaml=YAML(typ='safe') + yaml.default_flow_style = False with open(configFile, 'w') as f: - yaml.dump(config, f, default_flow_style=False) + #yaml.dump(config, f, default_flow_style=False) + if trafo: + yaml.dump(config, f, transform=trafo) + else: + yaml.dump(config, f) # returns all key-value pairs that are different from dict1 to dict2 @@ -632,7 +641,7 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript): # save to configs.yaml in outdir config = defaults config.update(vars(args)) # This allows modifications of args after handling a user config file to still make it to the YAML given to snakemake! - write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config) + write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config, trafo=None) # merge cluster config files: 1) global one, 2) workflow specific one, 3) user provided one cfg = load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), False, "defaults") @@ -719,7 +728,7 @@ def print_DAG(args, snakemake_cmd, callingScript, defaults): config['verbose'] = False write_configfile( os.path.join(args.outdir, - '{}.config.yaml'.format(workflowName)), config) + '{}.config.yaml'.format(workflowName)), config, trafo=None) DAGproc = subprocess.Popen( snakemake_cmd + " --rulegraph -q ", @@ -734,7 +743,7 @@ def print_DAG(args, snakemake_cmd, callingScript, defaults): config['verbose'] = oldVerbose write_configfile( os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), - config) + config, trafo=None) def logAndExport(args, workflowName): @@ -794,6 +803,9 @@ def runAndCleanup(args, cmd, logfile_name): if args.emailAddress: sendEmail(args, 0) +def tr(s): + return s.replace('null', 'None') + def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): """ @@ -856,14 +868,14 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): print("No control sample found!") chip_dict_pred["chip_dict"][i] = {} - chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None + chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None if re.match(".*(H3K4me1|H3K36me3|H3K9me3|H3K27me3).*", i, re.IGNORECASE): chip_dict_pred["chip_dict"][i]['Broad'] = True else: chip_dict_pred["chip_dict"][i]['Broad'] = False outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml") - write_configfile(outfile, chip_dict_pred) + write_configfile(outfile, chip_dict_pred,trafo=tr) print("---------------------------------------------------------------------------------------") print("ChIPseq sample configuration is written to file ", outfile) print("Please check and modify this file - this is just a guess! Then run the workflow with it.") diff --git a/snakePipes/snakePipes.py b/snakePipes/snakePipes.py index 7240ad91d..82ae42c65 100755 --- a/snakePipes/snakePipes.py +++ b/snakePipes/snakePipes.py @@ -414,7 +414,7 @@ def updateConfig(args): else: sys.exit("Config file not found\n") updatedDict = cof.merge_dicts(currentDict, d) - cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict) + cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict, trafo=None) #update conda-prefix in snakemakeProfile if args.condaEnvDir: @@ -422,7 +422,7 @@ def updateConfig(args): f = open(profilePath / 'config.yaml') pf = yaml.load(f, Loader=yaml.FullLoader) pf['conda-prefix'] = args.condaEnvDir - cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf) + cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf, trafo=None) f.close() cof.load_configfile( diff --git a/snakePipes/workflows/ChIPseq/internals.snakefile b/snakePipes/workflows/ChIPseq/internals.snakefile index 8c8eae6f5..b5b842ec6 100755 --- a/snakePipes/workflows/ChIPseq/internals.snakefile +++ b/snakePipes/workflows/ChIPseq/internals.snakefile @@ -104,7 +104,7 @@ with open(samples_config, "r") as f: exit(1) del chip_dict_tmp -cf.write_configfile(os.path.join("chip_samples.yaml"), chip_dict) +cf.write_configfile(os.path.join("chip_samples.yaml"), chip_dict, trafo=None) # create unique sets of control samples, ChIP samples with and without control control_samples = set()