diff --git a/CampaignConfig/mdc2020_digireco.cfg b/CampaignConfig/mdc2020_digireco.cfg index e74bf68e..616ef3d4 100644 --- a/CampaignConfig/mdc2020_digireco.cfg +++ b/CampaignConfig/mdc2020_digireco.cfg @@ -21,7 +21,11 @@ global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 -submit.memory = 4000MB +submit.memory = 6000MB +submit.disk = 40GB +submit.expected-lifetime = 48h +submit.timeout = 47h + executable_2.name = run_JITfcl.py job_setup.multifile = True @@ -97,6 +101,7 @@ global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s executable_2.name = run_JITfcl.py + submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 job_setup.multifile = True diff --git a/CampaignConfig/mdc2020_primary.cfg b/CampaignConfig/mdc2020_primary.cfg index 1f7d0bb2..47937f70 100644 --- a/CampaignConfig/mdc2020_primary.cfg +++ b/CampaignConfig/mdc2020_primary.cfg @@ -17,10 +17,16 @@ global.desc = %(primary_name)s global.upload_parfile = True submit.f_1 = dropbox:////tmp/%(parfile)s -executable_2.name = run_JITfcl.py +#submit.f_2=dropbox:///exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/run_JITfcl.py +#job_setup.prescript_14 = chmod +x ${CONDOR_DIR_INPUT}/* +#executable_4.name = \\\\\\\$CONDOR_DIR_INPUT/run_JITfcl.py + +executable_4.name = run_JITfcl.py submit.dataset = %(index_dataset)s submit.n_files_per_job = 1 job_setup.multifile = True +env_pass.MOO_CONFIG = simjob-mdc2020 + diff --git a/CampaignConfig/mdc2020_prolog.cfg b/CampaignConfig/mdc2020_prolog.cfg index 69d31aae..bafa0937 100644 --- a/CampaignConfig/mdc2020_prolog.cfg +++ b/CampaignConfig/mdc2020_prolog.cfg @@ -4,19 +4,8 @@ subgroup = highpro experiment = mu2e wrapper = file:///${FIFE_UTILS_DIR}/libexec/fife_wrap submitter = mu2e -outdir_sim_scratch = /pnfs/mu2e/scratch/datasets/phy-sim/sim/mu2e/ -outdir_dts_scratch = /pnfs/mu2e/scratch/datasets/phy-sim/dts/mu2e/ -logdir_bck = /pnfs/mu2e/persistent/datasets/phy-etc/bck/mu2e/ -outdir_fcl = /pnfs/mu2e/scratch/datasets/phy-etc/cnf/mu2e/ - -outdir_dig_tape = /pnfs/mu2e/tape/phy-sim/dig/mu2e/ -outdir_dts_tape = /pnfs/mu2e/tape/phy-sim/dts/mu2e/ -outdir_nts_tape = /pnfs/mu2e/tape/phy-nts/nts/mu2e/ -outdir_sim_tape = /pnfs/mu2e/tape/phy-sim/sim/mu2e/ -outdir_mcs_tape = /pnfs/mu2e/tape/phy-sim/mcs/mu2e/ -outdir_fcl_tape = /pnfs/mu2e/persistent/datasets/phy-etc/cnf/mu2e/ -outdir_tar = /pnfs/mu2e/persistent/datasets/phy-etc/cnf/mu2e/ -logdir_bck_tape = /pnfs/mu2e/persistent/datasets/phy-etc/bck/mu2e/ +outdir_tar = /pnfs/mu2e/persistent/datasets/phy-etc/cnf/mu2e/ + release = MDC2020 db_version = v1_3 @@ -80,22 +69,21 @@ append_condor_requirements='(TARGET.HAS_SINGULARITY=?=true)' [job_setup] debug = True find_setups = False +ifdh_art = False source_1 = /cvmfs/mu2e.opensciencegrid.org/setupmu2e-art.sh source_2 = ${MUSE_DIR}/bin/muse setup ops source_3 = /cvmfs/mu2e.opensciencegrid.org/bin/OfflineOps/functions.sh source_4 = %(simjob_setup)s - setup = OfflineOps setup_1 = mu2etools -ifdh_art = False -postscript = [ -f template.fcl ] && rm template.fcl -postscript_2 = [[ $(ls *.{art,root} 2>/dev/null) ]] && samweb file-lineage parents `basename ${fname}` > parents.txt -postscript_3 = [[ $(ls *.{art,root} 2>/dev/null) ]] && echo `basename ${fname}` >> parents.txt -postscript_4 = IND=$(echo $fname | awk -F. '{print $5}'); IND=$((10#$IND)); -postscript_5 = TARF=$(ls $CONDOR_DIR_INPUT/cnf*.0.tar 2>/dev/null); -postscript_6 = [[ -n ${TARF} ]] && mu2ejobiodetail --jobdef ${TARF} --index ${IND} --inputs >> parents.txt +#postscript = [ -f template.fcl ] && rm template.fcl +#postscript_2 = [[ $(ls *.{art,root} 2>/dev/null) ]] && samweb file-lineage parents `basename ${fname}` > parents.txt +#postscript_3 = [[ $(ls *.{art,root} 2>/dev/null) ]] && echo `basename ${fname}` >> parents.txt +#postscript_4 = IND=$(echo $fname | awk -F. '{print $5}'); IND=$((10#$IND)); +#postscript_5 = TARF=$(ls $CONDOR_DIR_INPUT/cnf*.0.tar 2>/dev/null); +#postscript_6 = [[ -n ${TARF} ]] && mu2ejobiodetail --jobdef ${TARF} --index ${IND} --inputs >> parents.txt [sam_consumer] limit = 1 @@ -105,83 +93,12 @@ appfamily = art appname = SimJob [prelaunch] -script = [[ "%(upload_parfile)s" == "True" ]] && echo "Uploading parfile: %(parfile)s" && rm -f /tmp/%(parfile)s && cp $(samweb locate-file %(parfile)s | sed 's/^dcache://')/%(parfile)s /tmp/ +script = [[ "%(upload_parfile)s" == "True" ]] && echo "Uploading parfile: %(parfile)s" && rm -f /tmp/%(parfile)s && cp $(samweb locate-file %(parfile)s | sed 's/^dcache://')/%(parfile)s /tmp/ [job_output] declare_metadata = True [job_output_1] -addoutput = bck.*.tbz -declare_metadata = False -metadata_extractor = printJsonSave.sh -add_location = True -add_metadata = file_format=tbz -add_to_dataset = bck.%(submitter)s.%(desc)s.%(dsconf)s.tbz -dest = %(logdir_bck)s/%(desc)s/%(dsconf)s/tbz/ -hash = 2 -hash_alg = sha256 - -[job_output_2] -addoutput = dig.%(submitter)s.%(desc)sTriggered.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = dig.%(submitter)s.%(desc)sTriggered.%(dsconf)s.art -dest = %(outdir_dig_tape)s/%(desc)sTriggered/%(dsconf)s/art - -[job_output_3] -addoutput = dig.%(submitter)s.%(desc)sTriggerable.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = dig.%(submitter)s.%(desc)sTriggerable.%(dsconf)s.art -dest = %(outdir_dig_tape)s/%(desc)sTriggerable/%(dsconf)s/art - -[job_output_4] -addoutput = mcs.%(submitter)s.%(desc)sTriggered.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = mcs.%(submitter)s.%(desc)sTriggered.%(dsconf)s.art -dest = %(outdir_mcs_tape)s/%(desc)sTriggered/%(dsconf)s/art - -[job_output_5] -addoutput = mcs.%(submitter)s.%(desc)s.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = mcs.%(submitter)s.%(desc)s.%(dsconf)s.art -dest = %(outdir_mcs_tape)s/%(desc)s/%(dsconf)s/art - -[job_output_6] -addoutput = dts.%(submitter)s.%(desc)s.%(dsconf)s.*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = dts.%(submitter)s.%(desc)s.%(dsconf)s.art -dest = %(outdir_dts_tape)s/%(desc)s/%(dsconf)s/art - -[job_output_7] -addoutput = sim.%(submitter)s.%(desc)s.%(dsconf)s*.art -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -hash = 2 -hash_alg = sha256 -add_to_dataset = sim.%(submitter)s.%(desc)s.%(dsconf)s.art -dest = %(outdir_sim_tape)s/%(desc)s/%(dsconf)s/art - -[job_output_8] addoutput = cnf*.tar add_to_dataset = cnf.%(submitter)s.%(desc)s.%(dsconf)s.tar declare_metadata = True @@ -192,17 +109,6 @@ hash = 2 hash_alg = sha256 dest = %(outdir_tar)s/%(desc)s/%(dsconf)s/tar/ -[job_output_9] -addoutput = nts.*.root -add_to_dataset = nts.%(submitter)s.%(desc)s.%(dsconf)s.root -declare_metadata = True -metadata_extractor = printJsonSave.sh -add_location = True -add_metadata = file_type=mc -hash = 2 -hash_alg = sha256 -dest = %(outdir_nts_tape)s/%(desc)s/%(dsconf)s/root - [stage_recovery] submit.dataset=%(override_dataset)s diff --git a/JobConfig/beam/SimpleAntiprotons.fcl b/JobConfig/pbar/PbarSTGunStops.fcl similarity index 96% rename from JobConfig/beam/SimpleAntiprotons.fcl rename to JobConfig/pbar/PbarSTGunStops.fcl index 78c6e062..10b5b045 100644 --- a/JobConfig/beam/SimpleAntiprotons.fcl +++ b/JobConfig/pbar/PbarSTGunStops.fcl @@ -6,7 +6,7 @@ #include "Production/JobConfig/common/prolog.fcl" #include "Production/JobConfig/beam/prolog.fcl" -process_name : SimpleAntiprotons +process_name : PbarSTGunStops source : { module_type : EmptyEvent @@ -72,7 +72,7 @@ outputs: { "keep *_tgtStopFilter_*_*", "keep *_compressPVTGTStops_*_*" ] - fileName : "sim.owner.stoppedSimpleAntiprotons.version.sequencer.art" + fileName : "sim.owner.PbarSTGunStops.version.sequencer.art" } } diff --git a/JobConfig/primary/AntiprotonStop.fcl b/JobConfig/primary/PbarResampling.fcl similarity index 93% rename from JobConfig/primary/AntiprotonStop.fcl rename to JobConfig/primary/PbarResampling.fcl index 24050e8c..e5c0d926 100644 --- a/JobConfig/primary/AntiprotonStop.fcl +++ b/JobConfig/primary/PbarResampling.fcl @@ -26,4 +26,4 @@ physics.filters.PrimaryFilter.MinimumPartMom : 40. physics.producers.g4run.inputs.simStageOverride : 1 physics.producers.FindMCPrimary.PrimaryProcess : mu2eAntiproton -outputs.PrimaryOutput.fileName: "dts.owner.AntiprotonStop.version.sequencer.art" +outputs.PrimaryOutput.fileName: "dts.owner.PbarResampling.version.sequencer.art" diff --git a/ProjPy/gen_Campaigns.py b/ProjPy/gen_Campaigns.py index 1d1c268a..bccd39f9 100755 --- a/ProjPy/gen_Campaigns.py +++ b/ProjPy/gen_Campaigns.py @@ -64,7 +64,7 @@ value = list(value.values()) # Drop setup.sh from the campaign/file name - campain_name_list = [item for item in value if 'setup.sh' not in item] + campain_name_list = [item for item in value if isinstance(item, str) and 'setup.sh' not in item] # We use only keys that appear prior to cutoff_key (i.e "primary_name"), and ignore the rest in the campaign/file name if cutoff_key is not None: diff --git a/ProjPy/mdc2020_primary.ini b/ProjPy/mdc2020_primary.ini index 15f9693c..453c5095 100644 --- a/ProjPy/mdc2020_primary.ini +++ b/ProjPy/mdc2020_primary.ini @@ -16,7 +16,7 @@ campaign_keywords={"release_v_o": "override_me", "num_jobs": "override_me", "cat": "override_me", "run_number": "1210", - "simjob_release": "override_me"} + "simjob_setup": "override_me"} software_version=mdc2020 dataset_or_split_data=None @@ -33,12 +33,12 @@ param_overrides = [["-Oglobal.release_v_o=","%(release_v_o)s"], ["-Oglobal.num_jobs=","%(num_jobs)s"], ["-Oglobal.cat=","%(cat)s"], ["-Oglobal.run_number=","%(run_number)s"], - ["-Oglobal.simjob_release=","%(simjob_release)s"]] + ["-Oglobal.simjob_setup=","%(simjob_setup)s"]] test_param_overrides = [] merge_overrides=True login_setup=mu2epro -job_type = mu2epro_jobtype_mdc2020ak +job_type = mu2epro_jobtype_mdc2020ap stage_type=regular output_ancestor_depth=1 diff --git a/Scripts/gen_Primary.sh b/Scripts/gen_Primary.sh index 28db7ca7..52273cc6 100755 --- a/Scripts/gen_Primary.sh +++ b/Scripts/gen_Primary.sh @@ -141,11 +141,14 @@ else fi dataset=sim.${OWNER}.${TYPE}Stops${CAT}.${STOPS_CAMPAIGN}.art +echo "Input dataset: ${dataset}" if [[ "${TYPE}" == "Muminus" ]] || [[ "${TYPE}" == "Muplus" ]]; then resampler=TargetStopResampler elif [[ "${TYPE}" == "Piminus" ]] || [[ "${TYPE}" == "Piplus" ]]; then resampler=TargetPiStopResampler +elif [[ "${TYPE}" == RMC* ]]; then + resampler=GammaConversionResampler else resampler=${TYPE}StopResampler fi diff --git a/Scripts/remove_sam_locations.py b/Scripts/remove_sam_locations.py new file mode 100644 index 00000000..fc5ec55d --- /dev/null +++ b/Scripts/remove_sam_locations.py @@ -0,0 +1,96 @@ +# Examples of usage: +# python /exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/remove_bad_locations.py --file /exp/mu2e/app/users/mu2epro/production_manager/current_datasets/mc/datasets_evntuple_an.txt --dry-run +# or +# python /exp/mu2e/app/users/oksuzian/muse_080224/Production/Scripts/remove_bad_locations.py --definition nts.mu2e.CosmicCORSIKASignalAllOnSpillTriggered.MDC2020an_v06_01_01_perfect_v1_3.root + +import subprocess +import argparse + +# Function to get file list from a definition +def get_files_from_definition(definition_name): + try: + result = subprocess.run( + ["samweb", "list-definition-files", definition_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result.check_returncode() + return result.stdout.strip().split("\n") + except subprocess.CalledProcessError as e: + print(f"Error fetching file list: {e.stderr}") + return [] + +# Function to get file list from a text file containing definitions +def get_files_from_definitions_file(file_path): + files = [] + try: + with open(file_path, "r") as file: + definitions = [line.strip() for line in file if line.strip()] + for definition in definitions: + files.extend(get_files_from_definition(definition)) + except Exception as e: + print(f"Error reading definitions file {file_path}: {e}") + return files + +# Function to get locations for a file +def get_file_locations(file_name): + try: + result = subprocess.run( + ["samweb", "locate-file", file_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result.check_returncode() + return [line.strip() for line in result.stdout.strip().split("\n") if line] + except subprocess.CalledProcessError as e: + print(f"Error fetching locations for {file_name}: {e.stderr}") + return [] + +# Function to remove a specific location for a file +def remove_file_location(file_name, location): + try: + result = subprocess.run( + ["samweb", "remove-file-location", file_name, location], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + result.check_returncode() + print(f"Removed location {location} for file {file_name}") + except subprocess.CalledProcessError as e: + print(f"Error removing location {location} for file {file_name}: {e.stderr}") + +# Main script +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Remove file locations containing a specific keyword.") + parser.add_argument("--definition", help="The SAM definition name.") + parser.add_argument("--file", help="Path to a text file containing a list of definitions.") + parser.add_argument("--keyword", default="override_me", help="Keyword to identify locations to remove (default: override_me).") + parser.add_argument("--dry-run", action="store_true", help="If set, only print the actions without executing them.") + args = parser.parse_args() + + override_keyword = args.keyword + + # Get the list of files from the definition or a file containing definitions + if args.definition: + files = get_files_from_definition(args.definition) + elif args.file: + files = get_files_from_definitions_file(args.file) + else: + print("Error: You must provide either a SAM definition name or a file containing a list of definitions.") + exit(1) + + for file_name in files: + # Get locations for the current file + locations = get_file_locations(file_name) + + for location in locations: + if override_keyword in location: + if args.dry_run: + print(f"[Dry Run] Would remove location {location} for file {file_name}") + else: + remove_file_location(file_name, location) + + print("Processing complete.") diff --git a/Scripts/run_JITfcl.py b/Scripts/run_JITfcl.py index 9c18da8d..82cecae2 100755 --- a/Scripts/run_JITfcl.py +++ b/Scripts/run_JITfcl.py @@ -5,6 +5,9 @@ import argparse import subprocess from datetime import datetime +from pathlib import Path +import textwrap +import glob # Function: Exit with error. def exit_abnormal(): @@ -16,22 +19,40 @@ def usage(): print("Usage: script_name.py [--copy_input_mdh --copy_input_ifdh]") print("e.g. run_JITfcl.py --copy_input_mdh") -# Function to run a shell command and return the output +# Function to run a shell command and return the output while streaming def run_command(command): print(f"Running: {command}") - result = subprocess.run(command, shell=True, capture_output=True, text=True) - if result.returncode != 0: + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + output = [] # Collect the command output + for line in process.stdout: + print(line, end="") # Print each line in real-time + output.append(line.strip()) # Collect the output lines + process.wait() # Wait for the command to complete + + if process.returncode != 0: print(f"Error running command: {command}") - print(result.stderr) - print(result.stdout) + for line in process.stderr: + print(line, end="") exit_abnormal() - print(result.stdout) - return result.stdout + + return "\n".join(output) # Return the full output as a string + +# Replace the first and last fields +def replace_file_extensions(input_str, first_field, last_field): + fields = input_str.split('.') + fields[0] = first_field + fields[-1] = last_field + return '.'.join(fields) def main(): parser = argparse.ArgumentParser(description="Process some inputs.") parser.add_argument("--copy_input_mdh", action="store_true", help="Copy input files using mdh") parser.add_argument("--copy_input_ifdh", action="store_true", help="Copy input files using ifhd") + parser.add_argument('--dry-run', action='store_true', help='Print commands without actually running pushOutput') + parser.add_argument('--test-run', action='store_true', help='Run 10 events only') + parser.add_argument('--save-root', action='store_true', help='Save root and art output files') + parser.add_argument('--location', type=str, default='tape', help='Location identifier to include in output.txt (default: "tape")') + args = parser.parse_args() copy_input_mdh = args.copy_input_mdh copy_input_ifdh = args.copy_input_ifdh @@ -64,12 +85,17 @@ def main(): run_command(f"httokendecode -H") run_command(f"LV=$(which voms-proxy-init); echo $LV; ldd $LV; rpm -q -a | egrep 'voms|ssl'; printenv PATH; printenv LD_LIBRARY_PATH") -# run_command(f"voms-proxy-info -all") + # run_command(f"voms-proxy-info -all") + #unset BEARER_TOKEN + print(f"BEARER_TOKEN before unset: {os.environ.get('BEARER_TOKEN')}") + os.environ.pop('BEARER_TOKEN', None) + # Check if the variable is unset + print(f"BEARER_TOKEN after unset: {os.environ.get('BEARER_TOKEN')}") + infiles = run_command(f"mu2ejobiodetail --jobdef {TARF} --index {IND} --inputs") if copy_input_mdh: run_command(f"mu2ejobfcl --jobdef {TARF} --index {IND} --default-proto file --default-loc dir:{os.getcwd()}/indir > {FCL}") - infiles = run_command(f"mu2ejobiodetail --jobdef {TARF} --index {IND} --inputs") print("infiles: %s"%infiles) run_command(f"mdh copy-file -e 3 -o -v -s tape -l local {infiles}") run_command(f"mkdir indir; mv *.art indir/") @@ -86,8 +112,35 @@ def main(): print(f"{datetime.now()} submit_fclless {FCL} content") with open(FCL, 'r') as f: print(f.read()) - run_command(f"loggedMu2e.sh -c {FCL}") + + if args.test_run: + run_command(f"loggedMu2e.sh -n 10 -c {FCL}") + else: + run_command(f"loggedMu2e.sh -c {FCL}") + run_command(f"ls {fname}") + if args.save_root: + out_fnames = glob.glob("*.art") + glob.glob("*.root") + else: + out_fnames = glob.glob("*.art") # Find all .art files + + # Write the list to the file in one line + parents = infiles.split() + [fname] # Add {fname} to the list of files + Path("parents_list.txt").write_text("\n".join(parents) + "\n") + + tbz_file = replace_file_extensions(FCL, "bck", "tbz") + out_content = f"{args.location} {tbz_file} parents_list.txt\n" + for out_fname in out_fnames: + out_content += f"{args.location} {out_fname} parents_list.txt\n" + Path("output.txt").write_text(out_content) + + # Push output + run_command(f"httokendecode -H") + if args.dry_run: + print("[DRY RUN] Would run: pushOutput output.txt") + else: + run_command("pushOutput output.txt") + if __name__ == "__main__": main()