From 616673d7408c9b215c82500e4c4448f150270edc Mon Sep 17 00:00:00 2001 From: Iuri Oksuzian Date: Thu, 9 Jan 2025 14:26:07 -0600 Subject: [PATCH 1/4] Various additions to production scripts --- CampaignConfig/mdc2020_digireco.cfg | 49 +++++++++ CampaignConfig/mdc2020_prolog.cfg | 22 +++- CampaignConfig/wideband_crv.cfg | 49 +++++---- CampaignConfig/wideband_crv.ini | 4 +- ProjPy/gen_Campaigns.py | 25 ++--- ProjPy/mdc2020_mixing.ini | 18 ++-- ProjPy/mdc2020_reco.ini | 3 +- Scripts/gen_Merge.sh | 19 ++++ Scripts/inspect_datasets.py | 100 +++++++++++------ Scripts/run_RecoEntuple.py | 162 ++++++++++++++++++++++++++++ Scripts/run_mu2ejobfcl.py | 63 +++++++++++ data/mix_cosmic.json | 7 +- 12 files changed, 436 insertions(+), 85 deletions(-) create mode 100755 Scripts/run_RecoEntuple.py create mode 100755 Scripts/run_mu2ejobfcl.py diff --git a/CampaignConfig/mdc2020_digireco.cfg b/CampaignConfig/mdc2020_digireco.cfg index f7a23114..e74bf68e 100644 --- a/CampaignConfig/mdc2020_digireco.cfg +++ b/CampaignConfig/mdc2020_digireco.cfg @@ -100,3 +100,52 @@ executable_2.name = run_JITfcl.py submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 job_setup.multifile = True + +[stage_digireco_reco_list] +global.release_v_o = an + +#submit.f_2=dropbox:///exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/run_RecoEntuple.py +#job_setup.prescript_14 = chmod +x ${CONDOR_DIR_INPUT}/* +#executable_4.name = \\\\\\\$CONDOR_DIR_INPUT/run_RecoEntuple.py +executable_4.name = run_RecoEntuple.py +executable_4.arg_1 = --rver %(release_v_o)s +submit.n_files_per_job = 1 +job_setup.multifile = True +job_setup.setup = OfflineOps +job_setup.setup_1 = mu2etools +env_pass.MOO_CONFIG = simjob-mdc2020 + +# We don't use etc.*.txt dataset to extract the index +job_setup.postscript_4 = echo "Ignoring index dataset" +job_output_1.addoutput = *.xxx +job_output_2.addoutput = *.xxx +job_output_3.addoutput = *.xxx +job_output_4.addoutput = *.xxx +job_output_5.addoutput = *.xxx +job_output_6.addoutput = *.xxx +job_output_7.addoutput = *.xxx +job_output_8.addoutput = *.xxx + +[stage_digireco_evntuple_list] +global.simjob_setup=/cvmfs/mu2e.opensciencegrid.org/Musings/EventNtuple/%(version_entpl)s/setup.sh +#submit.f_2=dropbox:///exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/run_RecoEntuple.py +#job_setup.prescript_14 = chmod +x ${CONDOR_DIR_INPUT}/* +executable_4.name = run_RecoEntuple.py +executable_4.arg_1 = --version_reco %(release_v_o)s +executable_4.arg_2 = --version_entpl %(version_entpl)s + +submit.n_files_per_job = 1 +sam_consumer.limit = 1 +job_setup.multifile = True +env_pass.MOO_CONFIG = simjob-mdc2020 + +# We don't use etc.*.txt dataset to extract the index +job_setup.postscript_4 = echo "Ignoring index dataset" +job_output_1.addoutput = *.xxx +job_output_2.addoutput = *.xxx +job_output_3.addoutput = *.xxx +job_output_4.addoutput = *.xxx +job_output_5.addoutput = *.xxx +job_output_6.addoutput = *.xxx +job_output_7.addoutput = *.xxx +job_output_8.addoutput = *.xxx diff --git a/CampaignConfig/mdc2020_prolog.cfg b/CampaignConfig/mdc2020_prolog.cfg index e8f8aee5..69d31aae 100644 --- a/CampaignConfig/mdc2020_prolog.cfg +++ b/CampaignConfig/mdc2020_prolog.cfg @@ -66,7 +66,8 @@ timeout = 23h #Allows to overwrite incomplete output from failed jobs. #cnf*tar files can't be overwritten need-storage-modify_0 = /mu2e/tape -need-storage-modify_1 = /mu2e/persistent/datasets/phy-etc/bck +need-storage-modify_1 = /mu2e/scratch +need-storage-modify_2 = /mu2e/persistent/datasets/phy-etc/bck memory = 2500MB disk =20GB maxConcurrent = 5000 @@ -85,6 +86,9 @@ source_2 = ${MUSE_DIR}/bin/muse setup ops source_3 = /cvmfs/mu2e.opensciencegrid.org/bin/OfflineOps/functions.sh source_4 = %(simjob_setup)s +setup = OfflineOps +setup_1 = mu2etools + ifdh_art = False postscript = [ -f template.fcl ] && rm template.fcl postscript_2 = [[ $(ls *.{art,root} 2>/dev/null) ]] && samweb file-lineage parents `basename ${fname}` > parents.txt @@ -148,6 +152,16 @@ add_to_dataset = mcs.%(submitter)s.%(desc)sTriggered.%(dsconf)s.art dest = %(outdir_mcs_tape)s/%(desc)sTriggered/%(dsconf)s/art [job_output_5] +addoutput = mcs.%(submitter)s.%(desc)s.%(dsconf)s.*.art +declare_metadata = True +metadata_extractor = printJsonSave.sh +add_location = True +hash = 2 +hash_alg = sha256 +add_to_dataset = mcs.%(submitter)s.%(desc)s.%(dsconf)s.art +dest = %(outdir_mcs_tape)s/%(desc)s/%(dsconf)s/art + +[job_output_6] addoutput = dts.%(submitter)s.%(desc)s.%(dsconf)s.*.art declare_metadata = True metadata_extractor = printJsonSave.sh @@ -157,7 +171,7 @@ hash_alg = sha256 add_to_dataset = dts.%(submitter)s.%(desc)s.%(dsconf)s.art dest = %(outdir_dts_tape)s/%(desc)s/%(dsconf)s/art -[job_output_6] +[job_output_7] addoutput = sim.%(submitter)s.%(desc)s.%(dsconf)s*.art declare_metadata = True metadata_extractor = printJsonSave.sh @@ -167,7 +181,7 @@ hash_alg = sha256 add_to_dataset = sim.%(submitter)s.%(desc)s.%(dsconf)s.art dest = %(outdir_sim_tape)s/%(desc)s/%(dsconf)s/art -[job_output_7] +[job_output_8] addoutput = cnf*.tar add_to_dataset = cnf.%(submitter)s.%(desc)s.%(dsconf)s.tar declare_metadata = True @@ -178,7 +192,7 @@ hash = 2 hash_alg = sha256 dest = %(outdir_tar)s/%(desc)s/%(dsconf)s/tar/ -[job_output_8] +[job_output_9] addoutput = nts.*.root add_to_dataset = nts.%(submitter)s.%(desc)s.%(dsconf)s.root declare_metadata = True diff --git a/CampaignConfig/wideband_crv.cfg b/CampaignConfig/wideband_crv.cfg index 41b1a7e4..a78ac959 100644 --- a/CampaignConfig/wideband_crv.cfg +++ b/CampaignConfig/wideband_crv.cfg @@ -1,13 +1,12 @@ [global] -#includes = /cvmfs/mu2e.opensciencegrid.org/Musings/SimJob/%(release)s%(release_v_o)s/Production/CampaignConfig/mdc2020_prolog.cfg includes = /exp/mu2e/app/users/oksuzian/muse_080224/Production/CampaignConfig/mdc2020_prolog.cfg -stage_name_prefix = WidebandCRV_v19 -desc = %(release)s%(release_v_o)s +desc_prefix = WidebandCRV_v30 +dsconf = %(release)s%(release_v_o)s run_number = 103001 release = MDC2020 -release_v_o = aj -num_jobs = 1000 +release_v_o = an +num_jobs = 2000 num_events = 100000 [stage_test] @@ -18,17 +17,21 @@ executable_4.name = printenv job_setup.prescript = printenv [stage_generation_par] -global.stage_name = %(stage_name_prefix)s +global.desc = %(desc_prefix)s job_setup.prescript = echo '#include "Offline/CRVResponse/test/wideband/wideband4modules.fcl"' > template.fcl -job_setup.prescript_2 = echo 'outputs.Output.fileName : "mcs.owner.%(stage_name)s.configuration.sequencer.art"' >> template.fcl -job_setup.prescript_3 = echo 'services.TFileService.fileName : "nts.owner.%(stage_name)s.configuration.sequencer.root"' >> template.fcl +job_setup.prescript_2 = echo 'outputs.Output.fileName : "mcs.owner.%(desc)s.configuration.sequencer.art"' >> template.fcl +job_setup.prescript_3 = echo 'services.TFileService.fileName : "nts.owner.%(desc)s.configuration.sequencer.root"' >> template.fcl +job_setup.prescript_4 = echo 'source.module_type: EmptyEvent' >> template.fcl +job_setup.prescript_5 = ifdh mkdir_p %(outdir_tar)s/%(desc)s/%(dsconf)s/tar/ +job_setup.prescript_6 = echo 'physics.producers.CrvPhotons.photonYieldScaleFactor : 0.89' >> template.fcl +job_setup.prescript_7 = echo 'physics.analyzers.CrvWidebandTest.SelectEvents : [ TriggerPath ]' >> template.fcl +job_setup.prescript_8 = echo 'physics.producers.g4run.physics.minRangeCut : 10.' >> template.fcl +job_setup.prescript_9 = echo 'services.GeometryService.inputFile: "geom_Wideband4modules_v22.txt"' >> template.fcl + #job_setup.prescript_4 = echo 'physics.producers.generate.inputFile : "Production/JobConfig/cosmic/wideband/widebandCRYconfig_v1.txt"' >> template.fcl #job_setup.prescript_4 = echo 'physics.producers.generate.inputFile : "widebandCRYconfig_v6.txt"' >> template.fcl #job_setup.prescript_5 = echo 'services.GeometryService.inputFile: "geom_Wideband4modules_v18.txt"' >> template.fcl -job_setup.prescript_8 = echo 'source.module_type: EmptyEvent' >> template.fcl -job_setup.prescript_6 = ifdh mkdir_p %(outdir_tar)s/%(stage_name)s/%(desc)s/tar/ -job_setup.prescript_7 = echo 'physics.producers.CrvPhotons.photonYieldScaleFactor : 1.01' >> template.fcl executable.name = true executable_2.name = gen_S1.sh @@ -37,7 +40,7 @@ executable_2.arg_2 = -v %(release_v_o)s executable_2.arg_3 = -o mu2e executable_2.arg_4 = -s %(simjob_release)s executable_2.arg_5 = -f template.fcl -executable_2.arg_6 = -d %(stage_name)s +executable_2.arg_6 = -d %(desc)s executable_2.arg_7 = -j %(num_jobs)s executable_2.arg_8 = -e %(num_events)s executable_2.arg_9 = -r %(run_number)s @@ -45,33 +48,33 @@ executable_2.arg_9 = -r %(run_number)s job_setup.ifdh_art = False [stage_generation] -global.stage_name = %(stage_name_prefix)s +global.desc = %(desc_prefix)s global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 -job_output_1.dest = %(logdir_bck)s/%(stage_name)s/%(desc)s/tbz/ +job_output_1.dest = %(logdir_bck)s/%(desc)s/%(dsconf)s/tbz/ -job_output_2.dest = %(outdir_mcs_tape)s/%(stage_name)s/%(desc)s/art/ -job_output_2.add_to_dataset = mcs.%(submitter)s.%(stage_name)s.%(desc)s.art +job_output_2.dest = %(outdir_mcs_tape)s/%(desc)s/%(dsconf)s/art/ +job_output_2.add_to_dataset = mcs.%(submitter)s.%(desc)s.%(dsconf)s.art job_output_3.addoutput = nts.*.root -job_output_3.dest = %(outdir_nts_tape)s/%(stage_name)s/%(desc)s/root/ -job_output_3.add_to_dataset = nts.%(submitter)s.%(stage_name)s.%(desc)s.root +job_output_3.dest = %(outdir_nts_tape)s/%(desc)s/%(dsconf)s/root/ +job_output_3.add_to_dataset = nts.%(submitter)s.%(desc)s.%(dsconf)s.root -submit.f_2=dropbox:///exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/run_JITfcl.py -job_setup.prescript_14 = chmod +x ${CONDOR_DIR_INPUT}/* +#submit.f_2=dropbox:///exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/run_JITfcl.py +#job_setup.prescript_14 = chmod +x ${CONDOR_DIR_INPUT}/* -#submit.f_3=dropbox:///exp/mu2e/data/users/oksuzian/poms_upload/geom_Wideband4modules_v18.txt +submit.f_3=dropbox:///exp/mu2e/data/users/oksuzian/poms_upload/geom_Wideband4modules_v22.txt #submit.f_4=dropbox:///exp/mu2e/data/users/oksuzian/poms_upload/widebandCRYconfig_v6.txt job_setup.prescript_2=cp $CONDOR_DIR_INPUT/*.txt . job_setup.prescript_3=export MU2E_SEARCH_PATH=$MU2E_SEARCH_PATH:$PWD executable.name = true -executable_4.name = \\\\\\\$CONDOR_DIR_INPUT/run_JITfcl.py -#executable_4.name = run_JITfcl.py +#executable_4.name = \\\\\\\$CONDOR_DIR_INPUT/run_JITfcl.py +executable_4.name = run_JITfcl.py job_setup.getconfig = False job_setup.multifile = True diff --git a/CampaignConfig/wideband_crv.ini b/CampaignConfig/wideband_crv.ini index c73d449b..6b724e45 100644 --- a/CampaignConfig/wideband_crv.ini +++ b/CampaignConfig/wideband_crv.ini @@ -6,8 +6,8 @@ campaign_stage_list = test, generation_par, generation [campaign_defaults] vo_role=Production -campaign_keywords={"release_v_o": "aj", - "simjob_release": "/cvmfs/mu2e.opensciencegrid.org/Musings/SimJob/MDC2020aj/setup.sh"} +campaign_keywords={"release_v_o": "an", + "simjob_release": "/cvmfs/mu2e.opensciencegrid.org/Musings/SimJob/MDC2020an/setup.sh"} software_version=mdc2020 dataset_or_split_data=None diff --git a/ProjPy/gen_Campaigns.py b/ProjPy/gen_Campaigns.py index 1784070a..1d1c268a 100755 --- a/ProjPy/gen_Campaigns.py +++ b/ProjPy/gen_Campaigns.py @@ -1,9 +1,9 @@ #!/usr/bin/env python #Script to create and/or submit multiple campaign using Project-py -#Create ini files: ./ProjPy/gen_Campaigns.py --ini_file ProjPy/mdc2020_mixing.ini --cfg_file CampaignConfig/mdc2020_digireco.cfg --comb_json data/mix.json --simjob MDC2020ae -#Create ini files: ./ProjPy/gen_Campaigns.py --ini_file ProjPy/mdc2020_primary.ini --cfg_file CampaignConfig/mdc2020_primary.cfg --comb_json data/primary.json --simjob MDC2020ae --comb_type list --cutoff_key primary_name -#Create, upload and submit all campaign: ./ProjPy/gen_Campaigns.py --ini_file ProjPy/mdc2020_mixing.ini --cfg_file CampaignConfig/mdc2020_digireco.cfg --comb_json data/mix.json --simjob MDC2020ae --create_campaign --submit +#Create ini files: ./ProjPy/gen_Campaigns.py --ini_file ProjPy/mdc2020_mixing.in --comb_json data/mix.json --simjob MDC2020ae +#Create ini files: ./ProjPy/gen_Campaigns.py --ini_file ProjPy/mdc2020_primary.ini --comb_json data/primary.json --simjob MDC2020ae --comb_type list --cutoff_key primary_name +#Create, upload and submit all campaign: ./ProjPy/gen_Campaigns.py --ini_file ProjPy/mdc2020_mixing.ini --cfg_file CampaignConfig/mdc2020_digireco.cfg --comb_json data/mix.json --simjob MDC2020ae --upload --submit #To upload setup poms-client first, delete voms-proxy (NOT as mu2epro!!!), get kx509: #source /cvmfs/fermilab.opensciencegrid.org/packages/common/setup-env.sh @@ -19,27 +19,25 @@ parser = argparse.ArgumentParser(description="Script to submit multiple POMS campaigns through project_py") requiredNamed = parser.add_argument_group('required arguments') requiredNamed.add_argument("--ini_file", type=str, help="INI file", required=True) -requiredNamed.add_argument("--cfg_file", type=str, help="CFG file", required=True) -requiredNamed.add_argument("--simjob", type=str, help="SimJob version, i.e. MDC2020ae", required=True) requiredNamed.add_argument("--comb_json", type=str, help="JSON file that contains combinations to run over", required=True) requiredNamed.add_argument("--comb_type", type=str, help="JSON file type: list or product", required=True) -requiredNamed.add_argument("--cutoff_key", type=str, help="Ignore keys in the campaign name after the cutoff_key", default=None) -parser.add_argument("--create_campaign", action="store_true", help="Create campaigns") +parser.add_argument("--cutoff_key", type=str, help="Ignore keys in the campaign name after the cutoff_key", default=None) +parser.add_argument("--simjob", type=str, help="SimJob version, i.e. MDC2020", default="MDC2020") +parser.add_argument("--upload", action="store_true", help="Create campaigns") parser.add_argument("--submit", action="store_true", help="Submit campaigns") parser.add_argument("--test_run", action="store_true", help="Run in test run mode") parser.add_argument("--ini_version", default="", type=str, help="Append version to the end of campaign name, i.e. _v1") args = parser.parse_args() ini_file = args.ini_file -cfg_file = args.cfg_file simjob = args.simjob comb_json = args.comb_json comb_type = args.comb_type cutoff_key = args.cutoff_key ini_version = args.ini_version -create_campaign = args.create_campaign +upload = args.upload submit = args.submit test_run = args.test_run ini_version = args.ini_version @@ -65,12 +63,15 @@ list_keys = list(value.keys()) value = list(value.values()) + # Drop setup.sh from the campaign/file name + campain_name_list = [item for item in value if 'setup.sh' not in item] + # We use only keys that appear prior to cutoff_key (i.e "primary_name"), and ignore the rest in the campaign/file name if cutoff_key is not None: cutoff_key_index = list_keys.index(cutoff_key) + 1 - campaign_name = f"{simjob}_{'_'.join(map(str, value[:cutoff_key_index]))}{ini_version}" + campaign_name = f"{simjob}_{'_'.join(map(str, campain_name_list[:cutoff_key_index]))}{ini_version}" else: - campaign_name = f"{simjob}_{'_'.join(map(str, value))}{ini_version}" + campaign_name = f"{simjob}_{'_'.join(map(str, campain_name_list))}{ini_version}" out_ini_file = f"{campaign_name}.ini" os.system(f"cp {ini_file} {out_ini_file}") @@ -86,7 +87,7 @@ with open(out_ini_file, 'w') as file: file.write(file_data) - if create_campaign: + if upload: cmd=f"upload_wf --poms_role production {out_ini_file}" print(cmd) os.system(cmd) diff --git a/ProjPy/mdc2020_mixing.ini b/ProjPy/mdc2020_mixing.ini index c4a2f3b3..d07ec907 100644 --- a/ProjPy/mdc2020_mixing.ini +++ b/ProjPy/mdc2020_mixing.ini @@ -6,16 +6,15 @@ campaign_stage_list = mix_par, mix, reco_par, reco [campaign_defaults] vo_role=Production -campaign_keywords={"release_v_dig": "am", - "release_v_rec": "am", +campaign_keywords={"release_v_dig": "override_me", + "release_v_rec": "override_me", "release_v_o": "override_me", "release_v_stops": "p", "release_v_dts": "override_me", "primary_name": "override_me", "digitype": "override_me", "db_purpose": "override_me", - "stream": "Triggered", - "simjob_setup": "/cvmfs/mu2e.opensciencegrid.org/Musings/SimJob/MDC2020am/setup.sh"} + "stream": "Triggered"} software_version=mdc2020 dataset_or_split_data=None @@ -30,14 +29,13 @@ param_overrides = [["-Oglobal.release_v_dig=","%(release_v_dig)s"], ["-Oglobal.primary_name=","%(primary_name)s"], ["-Oglobal.digitype=","%(digitype)s"], ["-Oglobal.db_purpose=","%(db_purpose)s"], - ["-Oglobal.stream=","%(stream)s"], - ["-Oglobal.simjob_setup=","%(simjob_setup)s"]] + ["-Oglobal.stream=","%(stream)s"]] -test_param_overrides = [] +test_param_overrides = param_overrides merge_overrides=True login_setup=mu2epro_login_v11 -job_type=mu2epro_jobtype_mdc2020am +job_type=mu2epro_jobtype_mdc2020an stage_type=regular output_ancestor_depth=1 @@ -48,7 +46,7 @@ test_param_overrides = [["--stage ", "mix_par"]] [campaign_stage mix] param_overrides = [["--stage ", "digireco_mix"]] -test_param_overrides = [["--stage ", "digireco_mix"]] +test_param_overrides = [["--stage ", "digireco_mix"], ["-Oglobal.dataset=", "idx_test"]] [campaign_stage reco_par] param_overrides = [["--stage ", "digireco_reco_par"]] @@ -56,7 +54,7 @@ test_param_overrides = [["--stage ", "digireco_reco_par"]] [campaign_stage reco] param_overrides = [["--stage ", "digireco_reco"]] -test_param_overrides = [["--stage ", "digireco_reco"]] +test_param_overrides = [["--stage ", "digireco_reco"], ["-Oglobal.dataset=", "idx_test"]] [dependencies mix] campaign_stage_1 = mix_par diff --git a/ProjPy/mdc2020_reco.ini b/ProjPy/mdc2020_reco.ini index c893503f..c05c92d0 100644 --- a/ProjPy/mdc2020_reco.ini +++ b/ProjPy/mdc2020_reco.ini @@ -11,7 +11,7 @@ campaign_keywords={"release_v_dig": "override_me", "primary_name": "override_me", "digitype": "override_me", "db_purpose": "override_me", - "stream": "Triggered", + "stream": "Triggered", "simjob_setup": "override_me"} software_version=mdc2020 @@ -21,6 +21,7 @@ completion_type=located completion_pct=100 param_overrides = [["-Oglobal.release_v_dig=","%(release_v_dig)s"], ["-Oglobal.release_v_rec=","%(release_v_rec)s"], + ["-Oglobal.release_v_rec=","%(release_v_o)s"], ["-Oglobal.primary_name=","%(primary_name)s"], ["-Oglobal.digitype=","%(digitype)s"], ["-Oglobal.db_purpose=","%(db_purpose)s"], diff --git a/Scripts/gen_Merge.sh b/Scripts/gen_Merge.sh index 1cc63a3e..1cb96c68 100755 --- a/Scripts/gen_Merge.sh +++ b/Scripts/gen_Merge.sh @@ -55,6 +55,9 @@ while getopts ":-:" options; do prod) PROD=${!OPTIND} OPTIND=$(( $OPTIND + 1 )) ;; + dataset) + DATASET=${!OPTIND} OPTIND=$(( $OPTIND + 1 )) + ;; esac;; :) # If expected argument omitted: echo "Error: -${OPTARG} requires an argument." @@ -67,6 +70,22 @@ while getopts ":-:" options; do done +# If dataset is provided, parse it to extract DESC, DS_CONF, DS_OWNER +if [ -n "$DATASET" ]; then + # Expected format: mcs.mu2e...art + IFS='.' read -r PREFIX OWNER DESC_EXTRACT DS_CONF_EXTRACT SUFFIX <<< "$DATASET" + + # If the dataset follows the pattern: + # mcs.mu2e.CosmicCORSIKASignalAllOnSpillTriggered.MDC2020am_perfect_v1_3.art + # Then: + # DESC = CosmicCORSIKASignalAllOnSpillTriggered + # DS_CONF = MDC2020am_perfect_v1_3 + + [ -z "$DESC" ] && DESC="$DESC_EXTRACT" + [ -z "$DS_CONF" ] && DS_CONF="$DS_CONF_EXTRACT" +fi + + # Run the merge command with the specified options echo "Running mu2ejobdef command with the following options:" echo " Embed file: $EMBED_FILE" diff --git a/Scripts/inspect_datasets.py b/Scripts/inspect_datasets.py index 033999f9..02c6bcd8 100755 --- a/Scripts/inspect_datasets.py +++ b/Scripts/inspect_datasets.py @@ -14,35 +14,43 @@ from collections import Counter from io import StringIO import contextlib -#import pandas as pd +import pandas as pd import re import subprocess import json +import os +from datetime import datetime -def analyze_dataset(cli, dataset): +def analyze_dataset(cli, dataset, skip_dcache_status): - # Run mdh query-dcache with online status - output = StringIO() - with contextlib.redirect_stdout(output): - cli.run(['query-dcache', '-o', dataset]) + counts = {} + + if not skip_dcache_status: + try: + # Run mdh query-dcache with online status + output = StringIO() + with contextlib.redirect_stdout(output): + cli.run(['query-dcache', '-o', dataset]) - # Extract statuses from the output - statuses = re.findall(r'(NEARLINE|ONLINE_AND_NEARLINE)', output.getvalue()) - counts = Counter(statuses) + # Extract statuses from the output + statuses = re.findall(r'(NEARLINE|ONLINE_AND_NEARLINE)', output.getvalue()) + counts = Counter(statuses) + except RuntimeError as e: + print ("Bad dataset - please fix") # Get file info including size and rse.nevent using metacat command try: - cmd = [ - 'metacat', 'query', - '-m', 'all', '-j', - 'files', 'from', 'mu2e:' + dataset - ] - metacat_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True) + + cmd = f'metacat query -m all -j files from mu2e:{dataset}' + print("cmd: %s"%cmd) + metacat_output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, universal_newlines=True) file_list = json.loads(metacat_output) total_size_int = 0 - total_rse_nevent = 0 + total_nevent = 0 + total_files = len(file_list) + for file_info in file_list: # Sum up sizes size = file_info.get('size', 0) @@ -50,8 +58,8 @@ def analyze_dataset(cli, dataset): # Sum up rse.nevent values metadata = file_info.get('metadata', {}) - rse_nevent = metadata.get('rse.nevent', 0) - total_rse_nevent += rse_nevent + nevent = metadata.get('rse.nevent', 0) + total_nevent += nevent except subprocess.CalledProcessError as e: print(f"Error running metacat command for dataset '{dataset}': {e.output}") @@ -60,18 +68,29 @@ def analyze_dataset(cli, dataset): return { 'dataset': dataset, - 'NEARLINE': counts.get('NEARLINE', 0), - 'ONLINE_AND_NEARLINE': counts.get('ONLINE_AND_NEARLINE', 0), - 'Total size': total_size_int, - 'Total events': total_rse_nevent + 'NEARLINE': counts.get('NEARLINE', 0) if not skip_dcache_status else None, + 'ONLINE_AND_NEARLINE': counts.get('ONLINE_AND_NEARLINE', 0) if not skip_dcache_status else None, + 'Total size': round(total_size_int / (1024 ** 3), 2), # Convert bytes to GB + 'Total events': total_nevent, + 'Total files': total_files # Include the total number of files } def main(): - # Get input file from command line - if len(sys.argv) != 2: - print("Usage: ./inspect_datasets.py input_file") - sys.exit(1) - input_file = sys.argv[1] + + # Parse command-line arguments + parser = argparse.ArgumentParser(description="Analyze datasets and save results to a pandas DataFrame.") + parser.add_argument("--input-file", help="Input file containing dataset names") + parser.add_argument("--output-csv-folder", default="/exp/mu2e/app/home/mu2epro/cron/datasetMon/csv", + help="Path to the folder where output CSV files will be saved.") + parser.add_argument("--output-html-folder", default="/web/sites/mu2e.fnal.gov/htdocs/atwork/computing/ops/datasetMon", + help="Path to the folder where output HTML files will be saved.") + parser.add_argument("--skip-dcache-status", action="store_true", help="Skip querying dCache status.") + args = parser.parse_args() + + # Use args.input_file for input file + input_file = args.input_file + output_csv_folder = args.output_csv_folder + output_html_folder = args.output_html_folder # Create MdhCli instance cli = mdh_cli.MdhCli() @@ -82,15 +101,34 @@ def main(): datasets = [line.strip() for line in f if line.strip() and not line.strip().startswith('#')] for dataset in datasets: - result = analyze_dataset(cli, dataset) + result = analyze_dataset(cli, dataset, args.skip_dcache_status) results.append(result) print(result) + + # Get the current date in YYYY-MM-DD format + current_date = datetime.now().strftime('%Y-%m-%d') + # Create and display DataFrame -# df = pd.DataFrame(results) -# print(df) + df = pd.DataFrame(results) + df = df.sort_values(by='dataset', ascending=True, ignore_index=True) + # Add the current date as a new column to the DataFrame + df['date'] = current_date + print(df) + + # Derive HTML file name from input_file + base_name = os.path.splitext(os.path.basename(input_file))[0] + html_file = f"{base_name}_results.html" + # Calculate the sum of columns + sum_row = df.select_dtypes(include='number').sum() + sum_row['dataset'] = 'TOTAL' + df = pd.concat([df, sum_row.to_frame().T], ignore_index=True) + + df.to_html('%s/%s.html'%(output_html_folder, base_name), index=False) + print(f"HTML file '{html_file}' has been created.") + # Optionally save to CSV - # df.to_csv('dataset_results.csv', index=False) + df.to_csv('%s/%s_%s.csv'%(output_csv_folder, base_name, current_date), index=False) if __name__ == "__main__": main() diff --git a/Scripts/run_RecoEntuple.py b/Scripts/run_RecoEntuple.py new file mode 100755 index 00000000..c18949cb --- /dev/null +++ b/Scripts/run_RecoEntuple.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +import re +import os +import sys +import logging +import argparse +from pathlib import Path +import subprocess +import textwrap + +# --------------------------------------------------- +# Configure Logging to stdout +# --------------------------------------------------- +logger = logging.getLogger() +logger.handlers = [] +logger.setLevel(logging.INFO) + +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.INFO) +formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description='Process digi files with specified MDC release') + parser.add_argument('--version_reco', required=True, + help='Output reco version (e.g., an)') + parser.add_argument('--version_entpl', required=False, default="", + help='Output evntuple version (e.g., v06_01_00)') + parser.add_argument('--nevents', type=int, default=-1, + help='Number of events to process (default: -1)') + parser.add_argument('--location', type=str, default='tape', + help='Location identifier to include in output.txt (default: "tape")') + parser.add_argument('--dry-run', action='store_true', + help='Print commands without actually running them') + return parser.parse_args() + +def run_command(command: str) -> str: + """Run shell command and handle output""" + logging.info(f"Running: {command}") + result = subprocess.run(command, shell=True, check=False, capture_output=True, text=True) + if result.stdout: + logging.info(result.stdout) + if result.returncode != 0: + logging.error(f"Error running command: {command}") + logging.error(result.stderr) + sys.exit(1) + return result.stdout + +def process_filename(fname: str, version_reco: str, version_entpl: str) -> tuple[str, str, str]: + # Process input filename to extract purpose and version information. + out_fname = os.path.basename(fname) + # Replace any MDC2020xx pattern with version_reco + pattern = r'(MDC2020[a-z]+)' + if version_entpl: + out_fname = re.sub(pattern, f"MDC2020{version_reco}_{version_entpl}", out_fname) + out_fname = out_fname.replace("mcs.", "nts.") + out_fname = out_fname.replace(".art", ".root") + else: + out_fname = re.sub(pattern, f"MDC2020{version_reco}", out_fname) + out_fname = out_fname.replace("dig.", "mcs.") + + pattern = r"(MDC2020)\w+_(best|perfect)_(v\d+_\d+)" + match = re.search(pattern, fname) + if not match: + raise ValueError(f"Invalid filename format: {fname}") + + purpose = f"{match.group(1)}_{match.group(2)}" + version = f"{match.group(3)}" + return out_fname, purpose, version + +def write_fcl_file(out_fname: str, purpose: str, version: str) -> str: + """Write FCL configuration file""" + if "mcs" in out_fname: + fcl_content = f"""\ + #include "Production/JobConfig/reco/Reco.fcl" + #include "Production/JobConfig/reco/MakeSurfaceSteps.fcl" + services.DbService.purpose: {purpose} + services.DbService.version: {version} + services.DbService.verbose : 2 + outputs.Output.fileName: "{out_fname}" + """ + elif "nts" in out_fname: + desc = out_fname.split('.')[2] + fcl_content = f"""\ + #include "EventNtuple/fcl/from_mcs-primary.fcl" + services.TFileService.fileName: "{out_fname}" + """ + else: + sys.exit(1) + + + # Remove leading indentation + fcl_content = textwrap.dedent(fcl_content) + + + #Create fcl filename + split_name = out_fname.split('.') + split_name[0] = "cnf" # Replace first field + split_name[-1] = "fcl" # Replace last field + fcl_file = '.'.join(split_name) + + #Write to fcl file + with Path(fcl_file).open("w") as f: + f.write(fcl_content) + logging.info("FCL file created successfully") + logging.info(f"FCL content:\n{fcl_content}") + return str(fcl_file) + +def main(): + try: + # Parse command line arguments + args = parse_args() + + # Get input filename + in_fname = os.getenv("fname") + if not in_fname: + raise ValueError("fname environment variable not set") + + logging.info(f"Using output MDC2020 version: {args.version_reco}") + + # Process filename + out_fname, purpose, version = process_filename(in_fname, args.version_reco, args.version_entpl) + logging.info(f"Processing {in_fname} -> {out_fname}") + + # Write fcl configuration + fcl_file = write_fcl_file(out_fname, purpose, version) + + # Run processing + nevents = args.nevents # Number of events to process + run_command(f"loggedMu2e.sh -n {nevents} -s {in_fname} -c {fcl_file}") + + # Handle parent files + in_fname_base = os.path.basename(in_fname) + Path(f"parents_{in_fname_base}").write_text(in_fname_base) + + #Create tarbar filename + split_name = out_fname.split('.') + split_name[0] = "bck" # Replace first field + split_name[-1] = "tbz" # Replace last field + tbz_file = '.'.join(split_name) + + out_content = f"""\ + {args.location} {out_fname} parents_{in_fname_base} + {args.location} {tbz_file} parents_{in_fname_base} + """ + out_content = textwrap.dedent(out_content) + Path("output.txt").write_text(out_content) + + # Push output + if args.dry_run: + logging.info(f"[DRY RUN] Would run: pushOutput output.txt") + else: + run_command("pushOutput output.txt") + + except Exception as e: + logging.error(f"Error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/Scripts/run_mu2ejobfcl.py b/Scripts/run_mu2ejobfcl.py new file mode 100755 index 00000000..23bafaeb --- /dev/null +++ b/Scripts/run_mu2ejobfcl.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import sys +import subprocess + +def transform_filename(filename: str) -> str: + """ + Split the filename on dots, replace the first segment with 'cnf' + and the last segment with 'tar', then join back together with dots. + """ + parts = filename.split('.') + parts[0] = 'cnf' # Replace first field with 'cnf' + parts[-1] = 'tar' # Replace last field (extension) with 'tar' + parts[-2] = '0' # Replace last field (extension) with 'tar' + return '.'.join(parts) + +def main(): + # 1) Check command-line args + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + input_file = sys.argv[1] + + # Transform the input file name to par file + transformed_file = transform_filename(input_file) + print(f"Par file: {transformed_file}") + + # Locate the par file via samweb + try: + location_bytes = subprocess.check_output(["samweb", "locate-file", transformed_file]) + except subprocess.CalledProcessError as e: + print(f"Error: samweb locate-file failed for {transformed_file}") + print(e) + sys.exit(1) + + location_str = location_bytes.decode().strip() + + # Strip off 'dcache:' prefix + if location_str.startswith("dcache:"): + location_str = location_str[len("dcache:"):] + + # Form the full path to par file + full_path = f"{location_str}/{transformed_file}" + print(f"Par file located at: {full_path}") + print("++++++++++++++++++++++++++++++++++++++++") + + # Call mu2ejobfcl + try: + subprocess.run([ + "mu2ejobfcl", + "--jobdef", full_path, + "--target", input_file, + "--default-proto", "root", + "--default-loc", "tape" + ], check=True) + except subprocess.CalledProcessError as e: + print("Error: mu2ejobfcl command failed.") + print(e) + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/data/mix_cosmic.json b/data/mix_cosmic.json index a642c6f7..d4d3d058 100644 --- a/data/mix_cosmic.json +++ b/data/mix_cosmic.json @@ -1,6 +1,9 @@ { - "release_v_dts": ["af"], - "primary_name": ["CosmicCORSIKASignalAll", "CosmicCRYSignalAll", "CosmicCORSIKASignalLow", "CosmicCRYSignalLow", "CosmicCORSIKACalibAll", "CosmicCRYCalibAll", "CosmicCORSIKACalibLow", "CosmicCRYCalibLow"], + "release_v_o": ["ap"], + "release_v_dig": ["ap"], + "release_v_rec": ["ap"], + "release_v_dts": ["ae"], + "primary_name": ["CosmicCORSIKASignalAll", "CosmicCRYSignalAll"], "db_purpose": ["perfect", "best"], "digitype": ["Mix1BB", "Mix2BB"] } From 7e01294258712c9ef40308c49f418e20933fb724 Mon Sep 17 00:00:00 2001 From: Iuri Oksuzian Date: Fri, 10 Jan 2025 14:51:33 -0600 Subject: [PATCH 2/4] Addition of GammaConversionResampler to gen_Primary --- Scripts/gen_Primary.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Scripts/gen_Primary.sh b/Scripts/gen_Primary.sh index 28db7ca7..52273cc6 100755 --- a/Scripts/gen_Primary.sh +++ b/Scripts/gen_Primary.sh @@ -141,11 +141,14 @@ else fi dataset=sim.${OWNER}.${TYPE}Stops${CAT}.${STOPS_CAMPAIGN}.art +echo "Input dataset: ${dataset}" if [[ "${TYPE}" == "Muminus" ]] || [[ "${TYPE}" == "Muplus" ]]; then resampler=TargetStopResampler elif [[ "${TYPE}" == "Piminus" ]] || [[ "${TYPE}" == "Piplus" ]]; then resampler=TargetPiStopResampler +elif [[ "${TYPE}" == RMC* ]]; then + resampler=GammaConversionResampler else resampler=${TYPE}StopResampler fi From d3b360262907dcd2a45e80603f404539ce046950 Mon Sep 17 00:00:00 2001 From: Iuri Oksuzian Date: Mon, 20 Jan 2025 10:34:16 -0600 Subject: [PATCH 3/4] Implemented pushOutput in submission scripts --- CampaignConfig/mdc2020_digireco.cfg | 7 +- CampaignConfig/mdc2020_primary.cfg | 8 +- CampaignConfig/mdc2020_prolog.cfg | 114 +++------------------------- ProjPy/gen_Campaigns.py | 2 +- ProjPy/mdc2020_primary.ini | 6 +- Scripts/remove_sam_locations.py | 96 +++++++++++++++++++++++ Scripts/run_JITfcl.py | 73 +++++++++++++++--- 7 files changed, 186 insertions(+), 120 deletions(-) create mode 100644 Scripts/remove_sam_locations.py diff --git a/CampaignConfig/mdc2020_digireco.cfg b/CampaignConfig/mdc2020_digireco.cfg index e74bf68e..616ef3d4 100644 --- a/CampaignConfig/mdc2020_digireco.cfg +++ b/CampaignConfig/mdc2020_digireco.cfg @@ -21,7 +21,11 @@ global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 -submit.memory = 4000MB +submit.memory = 6000MB +submit.disk = 40GB +submit.expected-lifetime = 48h +submit.timeout = 47h + executable_2.name = run_JITfcl.py job_setup.multifile = True @@ -97,6 +101,7 @@ global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s executable_2.name = run_JITfcl.py + submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 job_setup.multifile = True diff --git a/CampaignConfig/mdc2020_primary.cfg b/CampaignConfig/mdc2020_primary.cfg index 1f7d0bb2..47937f70 100644 --- a/CampaignConfig/mdc2020_primary.cfg +++ b/CampaignConfig/mdc2020_primary.cfg @@ -17,10 +17,16 @@ global.desc = %(primary_name)s global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s -executable_2.name = run_JITfcl.py +#submit.f_2=dropbox:///exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/run_JITfcl.py +#job_setup.prescript_14 = chmod +x ${CONDOR_DIR_INPUT}/* +#executable_4.name = \\\\\\\$CONDOR_DIR_INPUT/run_JITfcl.py + +executable_4.name = run_JITfcl.py submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 job_setup.multifile = True +env_pass.MOO_CONFIG = simjob-mdc2020 + diff --git a/CampaignConfig/mdc2020_prolog.cfg b/CampaignConfig/mdc2020_prolog.cfg index 69d31aae..bafa0937 100644 --- a/CampaignConfig/mdc2020_prolog.cfg +++ b/CampaignConfig/mdc2020_prolog.cfg @@ -4,19 +4,8 @@ subgroup = highpro experiment = mu2e wrapper = file:///${FIFE_UTILS_DIR}/libexec/fife_wrap submitter = mu2e -outdir_sim_scratch = /pnfs/mu2e/scratch/datasets/phy-sim/sim/mu2e/ -outdir_dts_scratch = /pnfs/mu2e/scratch/datasets/phy-sim/dts/mu2e/ -logdir_bck = /pnfs/mu2e/persistent/datasets/phy-etc/bck/mu2e/ -outdir_fcl = /pnfs/mu2e/scratch/datasets/phy-etc/cnf/mu2e/ - -outdir_dig_tape = /pnfs/mu2e/tape/phy-sim/dig/mu2e/ -outdir_dts_tape = /pnfs/mu2e/tape/phy-sim/dts/mu2e/ -outdir_nts_tape = /pnfs/mu2e/tape/phy-nts/nts/mu2e/ -outdir_sim_tape = /pnfs/mu2e/tape/phy-sim/sim/mu2e/ -outdir_mcs_tape = /pnfs/mu2e/tape/phy-sim/mcs/mu2e/ -outdir_fcl_tape = /pnfs/mu2e/persistent/datasets/phy-etc/cnf/mu2e/ -outdir_tar = /pnfs/mu2e/persistent/datasets/phy-etc/cnf/mu2e/ -logdir_bck_tape = /pnfs/mu2e/persistent/datasets/phy-etc/bck/mu2e/ +outdir_tar = /pnfs/mu2e/persistent/datasets/phy-etc/cnf/mu2e/ + release = MDC2020 db_version = v1_3 @@ -80,22 +69,21 @@ append_condor_requirements='(TARGET.HAS_SINGULARITY=?=true)' [job_setup] debug = True find_setups = False +ifdh_art = False source_1 = /cvmfs/mu2e.opensciencegrid.org/setupmu2e-art.sh source_2 = ${MUSE_DIR}/bin/muse setup ops source_3 = /cvmfs/mu2e.opensciencegrid.org/bin/OfflineOps/functions.sh source_4 = %(simjob_setup)s - setup = OfflineOps setup_1 = mu2etools -ifdh_art = False -postscript = [ -f template.fcl ] && rm template.fcl -postscript_2 = [[ $(ls *.{art,root} 2>/dev/null) ]] && samweb file-lineage parents `basename ${fname}` > parents.txt -postscript_3 = [[ $(ls *.{art,root} 2>/dev/null) ]] && echo `basename ${fname}` >> parents.txt -postscript_4 = IND=$(echo $fname | awk -F. '{print $5}'); IND=$((10#$IND)); -postscript_5 = TARF=$(ls $CONDOR_DIR_INPUT/cnf*.0.tar 2>/dev/null); -postscript_6 = [[ -n ${TARF} ]] && mu2ejobiodetail --jobdef ${TARF} --index ${IND} --inputs >> parents.txt +#postscript = [ -f template.fcl ] && rm template.fcl +#postscript_2 = [[ $(ls *.{art,root} 2>/dev/null) ]] && samweb file-lineage parents `basename ${fname}` > parents.txt +#postscript_3 = [[ $(ls *.{art,root} 2>/dev/null) ]] && echo `basename ${fname}` >> parents.txt +#postscript_4 = IND=$(echo $fname | awk -F. '{print $5}'); IND=$((10#$IND)); +#postscript_5 = TARF=$(ls $CONDOR_DIR_INPUT/cnf*.0.tar 2>/dev/null); +#postscript_6 = [[ -n ${TARF} ]] && mu2ejobiodetail --jobdef ${TARF} --index ${IND} --inputs >> parents.txt [sam_consumer] limit = 1 @@ -105,83 +93,12 @@ appfamily = art appname = SimJob [prelaunch] -script = [[ "%(upload_parfile)s" == "True" ]] && echo "Uploading parfile: %(parfile)s" && rm -f /tmp/%(parfile)s && cp $(samweb locate-file %(parfile)s | sed 's/^dcache://')/%(parfile)s /tmp/ +script = [[ "%(upload_parfile)s" == "True" ]] && echo "Uploading parfile: %(parfile)s" && rm -f /tmp/%(parfile)s && cp $(samweb locate-file %(parfile)s | sed 's/^dcache://')/%(parfile)s /tmp/ [job_output] declare_metadata = True [job_output_1] -addoutput = bck.*.tbz -declare_metadata = False -metadata_extractor = printJsonSave.sh -add_location = True -add_metadata = file_format=tbz -add_to_dataset = bck.%(submitter)s.%(desc)s.%(dsconf)s.tbz -dest = %(logdir_bck)s/%(desc)s/%(dsconf)s/tbz/ -hash = 2 -hash_alg = sha256 - -[job_output_2] -addoutput = dig.%(submitter)s.%(desc)sTriggered.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = dig.%(submitter)s.%(desc)sTriggered.%(dsconf)s.art -dest = %(outdir_dig_tape)s/%(desc)sTriggered/%(dsconf)s/art - -[job_output_3] -addoutput = dig.%(submitter)s.%(desc)sTriggerable.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = dig.%(submitter)s.%(desc)sTriggerable.%(dsconf)s.art -dest = %(outdir_dig_tape)s/%(desc)sTriggerable/%(dsconf)s/art - -[job_output_4] -addoutput = mcs.%(submitter)s.%(desc)sTriggered.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = mcs.%(submitter)s.%(desc)sTriggered.%(dsconf)s.art -dest = %(outdir_mcs_tape)s/%(desc)sTriggered/%(dsconf)s/art - -[job_output_5] -addoutput = mcs.%(submitter)s.%(desc)s.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = mcs.%(submitter)s.%(desc)s.%(dsconf)s.art -dest = %(outdir_mcs_tape)s/%(desc)s/%(dsconf)s/art - -[job_output_6] -addoutput = dts.%(submitter)s.%(desc)s.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = dts.%(submitter)s.%(desc)s.%(dsconf)s.art -dest = %(outdir_dts_tape)s/%(desc)s/%(dsconf)s/art - -[job_output_7] -addoutput = sim.%(submitter)s.%(desc)s.%(dsconf)s*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = sim.%(submitter)s.%(desc)s.%(dsconf)s.art -dest = %(outdir_sim_tape)s/%(desc)s/%(dsconf)s/art - -[job_output_8] addoutput = cnf*.tar add_to_dataset = cnf.%(submitter)s.%(desc)s.%(dsconf)s.tar declare_metadata = True @@ -192,17 +109,6 @@ hash = 2 hash_alg = sha256 dest = %(outdir_tar)s/%(desc)s/%(dsconf)s/tar/ -[job_output_9] -addoutput = nts.*.root -add_to_dataset = nts.%(submitter)s.%(desc)s.%(dsconf)s.root -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -add_metadata = file_type=mc -hash = 2 -hash_alg = sha256 -dest = %(outdir_nts_tape)s/%(desc)s/%(dsconf)s/root - [stage_recovery] submit.dataset=%(override_dataset)s diff --git a/ProjPy/gen_Campaigns.py b/ProjPy/gen_Campaigns.py index 1d1c268a..bccd39f9 100755 --- a/ProjPy/gen_Campaigns.py +++ b/ProjPy/gen_Campaigns.py @@ -64,7 +64,7 @@ value = list(value.values()) # Drop setup.sh from the campaign/file name - campain_name_list = [item for item in value if 'setup.sh' not in item] + campain_name_list = [item for item in value if isinstance(item, str) and 'setup.sh' not in item] # We use only keys that appear prior to cutoff_key (i.e "primary_name"), and ignore the rest in the campaign/file name if cutoff_key is not None: diff --git a/ProjPy/mdc2020_primary.ini b/ProjPy/mdc2020_primary.ini index 15f9693c..453c5095 100644 --- a/ProjPy/mdc2020_primary.ini +++ b/ProjPy/mdc2020_primary.ini @@ -16,7 +16,7 @@ campaign_keywords={"release_v_o": "override_me", "num_jobs": "override_me", "cat": "override_me", "run_number": "1210", - "simjob_release": "override_me"} + "simjob_setup": "override_me"} software_version=mdc2020 dataset_or_split_data=None @@ -33,12 +33,12 @@ param_overrides = [["-Oglobal.release_v_o=","%(release_v_o)s"], ["-Oglobal.num_jobs=","%(num_jobs)s"], ["-Oglobal.cat=","%(cat)s"], ["-Oglobal.run_number=","%(run_number)s"], - ["-Oglobal.simjob_release=","%(simjob_release)s"]] + ["-Oglobal.simjob_setup=","%(simjob_setup)s"]] test_param_overrides = [] merge_overrides=True login_setup=mu2epro -job_type = mu2epro_jobtype_mdc2020ak +job_type = mu2epro_jobtype_mdc2020ap stage_type=regular output_ancestor_depth=1 diff --git a/Scripts/remove_sam_locations.py b/Scripts/remove_sam_locations.py new file mode 100644 index 00000000..fc5ec55d --- /dev/null +++ b/Scripts/remove_sam_locations.py @@ -0,0 +1,96 @@ +# Examples of usage: +# python /exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/remove_bad_locations.py --file /exp/mu2e/app/users/mu2epro/production_manager/current_datasets/mc/datasets_evntuple_an.txt --dry-run +# or +# python /exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/remove_bad_locations.py --definition nts.mu2e.CosmicCORSIKASignalAllOnSpillTriggered.MDC2020an_v06_01_01_perfect_v1_3.root + +import subprocess +import argparse + +# Function to get file list from a definition +def get_files_from_definition(definition_name): + try: + result = subprocess.run( + ["samweb", "list-definition-files", definition_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result.check_returncode() + return result.stdout.strip().split("\n") + except subprocess.CalledProcessError as e: + print(f"Error fetching file list: {e.stderr}") + return [] + +# Function to get file list from a text file containing definitions +def get_files_from_definitions_file(file_path): + files = [] + try: + with open(file_path, "r") as file: + definitions = [line.strip() for line in file if line.strip()] + for definition in definitions: + files.extend(get_files_from_definition(definition)) + except Exception as e: + print(f"Error reading definitions file {file_path}: {e}") + return files + +# Function to get locations for a file +def get_file_locations(file_name): + try: + result = subprocess.run( + ["samweb", "locate-file", file_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result.check_returncode() + return [line.strip() for line in result.stdout.strip().split("\n") if line] + except subprocess.CalledProcessError as e: + print(f"Error fetching locations for {file_name}: {e.stderr}") + return [] + +# Function to remove a specific location for a file +def remove_file_location(file_name, location): + try: + result = subprocess.run( + ["samweb", "remove-file-location", file_name, location], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result.check_returncode() + print(f"Removed location {location} for file {file_name}") + except subprocess.CalledProcessError as e: + print(f"Error removing location {location} for file {file_name}: {e.stderr}") + +# Main script +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Remove file locations containing a specific keyword.") + parser.add_argument("--definition", help="The SAM definition name.") + parser.add_argument("--file", help="Path to a text file containing a list of definitions.") + parser.add_argument("--keyword", default="override_me", help="Keyword to identify locations to remove (default: override_me).") + parser.add_argument("--dry-run", action="store_true", help="If set, only print the actions without executing them.") + args = parser.parse_args() + + override_keyword = args.keyword + + # Get the list of files from the definition or a file containing definitions + if args.definition: + files = get_files_from_definition(args.definition) + elif args.file: + files = get_files_from_definitions_file(args.file) + else: + print("Error: You must provide either a SAM definition name or a file containing a list of definitions.") + exit(1) + + for file_name in files: + # Get locations for the current file + locations = get_file_locations(file_name) + + for location in locations: + if override_keyword in location: + if args.dry_run: + print(f"[Dry Run] Would remove location {location} for file {file_name}") + else: + remove_file_location(file_name, location) + + print("Processing complete.") diff --git a/Scripts/run_JITfcl.py b/Scripts/run_JITfcl.py index 9c18da8d..82cecae2 100755 --- a/Scripts/run_JITfcl.py +++ b/Scripts/run_JITfcl.py @@ -5,6 +5,9 @@ import argparse import subprocess from datetime import datetime +from pathlib import Path +import textwrap +import glob # Function: Exit with error. def exit_abnormal(): @@ -16,22 +19,40 @@ def usage(): print("Usage: script_name.py [--copy_input_mdh --copy_input_ifdh]") print("e.g. run_JITfcl.py --copy_input_mdh") -# Function to run a shell command and return the output +# Function to run a shell command and return the output while streaming def run_command(command): print(f"Running: {command}") - result = subprocess.run(command, shell=True, capture_output=True, text=True) - if result.returncode != 0: + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + output = [] # Collect the command output + for line in process.stdout: + print(line, end="") # Print each line in real-time + output.append(line.strip()) # Collect the output lines + process.wait() # Wait for the command to complete + + if process.returncode != 0: print(f"Error running command: {command}") - print(result.stderr) - print(result.stdout) + for line in process.stderr: + print(line, end="") exit_abnormal() - print(result.stdout) - return result.stdout + + return "\n".join(output) # Return the full output as a string + +# Replace the first and last fields +def replace_file_extensions(input_str, first_field, last_field): + fields = input_str.split('.') + fields[0] = first_field + fields[-1] = last_field + return '.'.join(fields) def main(): parser = argparse.ArgumentParser(description="Process some inputs.") parser.add_argument("--copy_input_mdh", action="store_true", help="Copy input files using mdh") parser.add_argument("--copy_input_ifdh", action="store_true", help="Copy input files using ifhd") + parser.add_argument('--dry-run', action='store_true', help='Print commands without actually running pushOutput') + parser.add_argument('--test-run', action='store_true', help='Run 10 events only') + parser.add_argument('--save-root', action='store_true', help='Save root and art output files') + parser.add_argument('--location', type=str, default='tape', help='Location identifier to include in output.txt (default: "tape")') + args = parser.parse_args() copy_input_mdh = args.copy_input_mdh copy_input_ifdh = args.copy_input_ifdh @@ -64,12 +85,17 @@ def main(): run_command(f"httokendecode -H") run_command(f"LV=$(which voms-proxy-init); echo $LV; ldd $LV; rpm -q -a | egrep 'voms|ssl'; printenv PATH; printenv LD_LIBRARY_PATH") -# run_command(f"voms-proxy-info -all") + # run_command(f"voms-proxy-info -all") + #unset BEARER_TOKEN + print(f"BEARER_TOKEN before unset: {os.environ.get('BEARER_TOKEN')}") + os.environ.pop('BEARER_TOKEN', None) + # Check if the variable is unset + print(f"BEARER_TOKEN after unset: {os.environ.get('BEARER_TOKEN')}") + infiles = run_command(f"mu2ejobiodetail --jobdef {TARF} --index {IND} --inputs") if copy_input_mdh: run_command(f"mu2ejobfcl --jobdef {TARF} --index {IND} --default-proto file --default-loc dir:{os.getcwd()}/indir > {FCL}") - infiles = run_command(f"mu2ejobiodetail --jobdef {TARF} --index {IND} --inputs") print("infiles: %s"%infiles) run_command(f"mdh copy-file -e 3 -o -v -s tape -l local {infiles}") run_command(f"mkdir indir; mv *.art indir/") @@ -86,8 +112,35 @@ def main(): print(f"{datetime.now()} submit_fclless {FCL} content") with open(FCL, 'r') as f: print(f.read()) - run_command(f"loggedMu2e.sh -c {FCL}") + + if args.test_run: + run_command(f"loggedMu2e.sh -n 10 -c {FCL}") + else: + run_command(f"loggedMu2e.sh -c {FCL}") + run_command(f"ls {fname}") + if args.save_root: + out_fnames = glob.glob("*.art") + glob.glob("*.root") + else: + out_fnames = glob.glob("*.art") # Find all .art files + + # Write the list to the file in one line + parents = infiles.split() + [fname] # Add {fname} to the list of files + Path("parents_list.txt").write_text("\n".join(parents) + "\n") + + tbz_file = replace_file_extensions(FCL, "bck", "tbz") + out_content = f"{args.location} {tbz_file} parents_list.txt\n" + for out_fname in out_fnames: + out_content += f"{args.location} {out_fname} parents_list.txt\n" + Path("output.txt").write_text(out_content) + + # Push output + run_command(f"httokendecode -H") + if args.dry_run: + print("[DRY RUN] Would run: pushOutput output.txt") + else: + run_command("pushOutput output.txt") + if __name__ == "__main__": main() From 44f55463c2928d76e19d36e4a5d3408e9045e474 Mon Sep 17 00:00:00 2001 From: Iuri Oksuzian Date: Tue, 21 Jan 2025 10:11:50 -0600 Subject: [PATCH 4/4] Tweaking fcls for pbars --- .../{beam/SimpleAntiprotons.fcl => pbar/PbarSTGunStops.fcl} | 4 ++-- JobConfig/primary/{AntiprotonStop.fcl => PbarResampling.fcl} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename JobConfig/{beam/SimpleAntiprotons.fcl => pbar/PbarSTGunStops.fcl} (96%) rename JobConfig/primary/{AntiprotonStop.fcl => PbarResampling.fcl} (93%) diff --git a/JobConfig/beam/SimpleAntiprotons.fcl b/JobConfig/pbar/PbarSTGunStops.fcl similarity index 96% rename from JobConfig/beam/SimpleAntiprotons.fcl rename to JobConfig/pbar/PbarSTGunStops.fcl index 78c6e062..10b5b045 100644 --- a/JobConfig/beam/SimpleAntiprotons.fcl +++ b/JobConfig/pbar/PbarSTGunStops.fcl @@ -6,7 +6,7 @@ #include "Production/JobConfig/common/prolog.fcl" #include "Production/JobConfig/beam/prolog.fcl" -process_name : SimpleAntiprotons +process_name : PbarSTGunStops source : { module_type : EmptyEvent @@ -72,7 +72,7 @@ outputs: { "keep *_tgtStopFilter_*_*", "keep *_compressPVTGTStops_*_*" ] - fileName : "sim.owner.stoppedSimpleAntiprotons.version.sequencer.art" + fileName : "sim.owner.PbarSTGunStops.version.sequencer.art" } } diff --git a/JobConfig/primary/AntiprotonStop.fcl b/JobConfig/primary/PbarResampling.fcl similarity index 93% rename from JobConfig/primary/AntiprotonStop.fcl rename to JobConfig/primary/PbarResampling.fcl index 24050e8c..e5c0d926 100644 --- a/JobConfig/primary/AntiprotonStop.fcl +++ b/JobConfig/primary/PbarResampling.fcl @@ -26,4 +26,4 @@ physics.filters.PrimaryFilter.MinimumPartMom : 40. physics.producers.g4run.inputs.simStageOverride : 1 physics.producers.FindMCPrimary.PrimaryProcess : mu2eAntiproton -outputs.PrimaryOutput.fileName: "dts.owner.AntiprotonStop.version.sequencer.art" +outputs.PrimaryOutput.fileName: "dts.owner.PbarResampling.version.sequencer.art"