From 72cdd357bcb52c469cad6ad5e59ed0858b53b288 Mon Sep 17 00:00:00 2001 From: Raniere Date: Tue, 26 Sep 2023 15:00:29 +0200 Subject: [PATCH] DL0 to DL1 reduction --- magicctapipe/scripts/lst1_magic/LST_runs.txt | 10 + .../scripts/lst1_magic/MAGIC_runs.txt | 30 ++ magicctapipe/scripts/lst1_magic/README.md | 268 ++++++++++- .../scripts/lst1_magic/coincident_events.py | 177 +++++++ .../scripts/lst1_magic/config_general.yaml | 26 + .../lst1_magic_event_coincidence.py | 312 +++++------- .../lst1_magic/lst1_magic_mc_dl0_to_dl1.py | 340 +++++++------ .../lst1_magic/lst1_magic_stereo_reco.py | 81 ++-- .../scripts/lst1_magic/magic_calib_to_dl1.py | 38 +- .../scripts/lst1_magic/merge_hdf_files.py | 9 +- ...ing_runs_and_splitting_training_samples.py | 269 +++++++++++ .../lst1_magic/setting_up_config_and_dir.py | 451 ++++++++++++++++++ .../scripts/lst1_magic/stereo_events.py | 183 +++++++ 13 files changed, 1800 insertions(+), 394 deletions(-) create mode 100644 magicctapipe/scripts/lst1_magic/LST_runs.txt create mode 100644 magicctapipe/scripts/lst1_magic/MAGIC_runs.txt create mode 100644 magicctapipe/scripts/lst1_magic/coincident_events.py create mode 100644 magicctapipe/scripts/lst1_magic/config_general.yaml create mode 100644 magicctapipe/scripts/lst1_magic/merging_runs_and_splitting_training_samples.py create mode 100644 magicctapipe/scripts/lst1_magic/setting_up_config_and_dir.py create mode 100644 magicctapipe/scripts/lst1_magic/stereo_events.py diff --git a/magicctapipe/scripts/lst1_magic/LST_runs.txt b/magicctapipe/scripts/lst1_magic/LST_runs.txt new file mode 100644 index 000000000..53b010ff0 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/LST_runs.txt @@ -0,0 +1,10 @@ +2020_11_18,2923 +2020_11_18,2924 +2020_12_07,3093 +2020_12_07,3094 +2020_12_07,3095 +2020_12_07,3096 +2020_12_15,3265 +2020_12_15,3266 +2020_12_15,3267 +2020_12_15,3268 diff --git a/magicctapipe/scripts/lst1_magic/MAGIC_runs.txt b/magicctapipe/scripts/lst1_magic/MAGIC_runs.txt new file mode 100644 index 000000000..ce49672b0 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/MAGIC_runs.txt @@ -0,0 +1,30 @@ +2020_11_19,5093174 +2020_11_19,5093175 +2020_12_08,5093491 +2020_12_08,5093492 +2020_12_16,5093711 +2020_12_16,5093712 +2020_12_16,5093713 +2020_12_16,5093714 +2021_02_14,5094483 +2021_02_14,5094484 +2021_02_14,5094485 +2021_02_14,5094486 +2021_02_14,5094487 +2021_02_14,5094488 +2021_03_16,5095265 +2021_03_16,5095266 +2021_03_16,5095267 +2021_03_16,5095268 +2021_03_16,5095271 +2021_03_16,5095272 +2021_03_16,5095273 +2021_03_16,5095277 +2021_03_16,5095278 +2021_03_16,5095281 +2021_03_18,5095376 +2021_03_18,5095377 +2021_03_18,5095380 +2021_03_18,5095381 +2021_03_18,5095382 +2021_03_18,5095383 diff --git a/magicctapipe/scripts/lst1_magic/README.md b/magicctapipe/scripts/lst1_magic/README.md index da9c695ca..3fb304f8c 100644 --- a/magicctapipe/scripts/lst1_magic/README.md +++ b/magicctapipe/scripts/lst1_magic/README.md @@ -1,12 +1,12 @@ -# Script for MAGIC and MAGIC+LST-1 analysis +# Script for MAGIC and MAGIC+LST analysis -This folder contains scripts to perform MAGIC-only or MAGIC+LST-1 analysis. +This folder contains scripts to perform MAGIC-only or MAGIC+LST analysis. Each script can be called from the command line from anywhere in your system (some console scripts are created during installation). Please run them with `-h` option for the first time to check what are the options available. ## MAGIC-only analysis -MAGIC-only analysis starts from MAGIC calibrated data (\_Y\_ files). The analysis flow is as following: +MAGIC-only analysis starts from MAGIC-calibrated data (\_Y\_ files). The analysis flow is as follows: - `magic_calib_to_dl1.py` on real and MC data (if you use MCs produced with MMCS), to convert them into DL1 format - if you use SimTelArray MCs, run `lst1_magic_mc_dl0_to_dl1.py` over them to convert them into DL1 format @@ -17,9 +17,9 @@ MAGIC-only analysis starts from MAGIC calibrated data (\_Y\_ files). The analysi - `lst1_magic_create_irf.py` to create the IRF (use `magic_stereo` as `irf_type` in the configuration file) - `lst1_magic_dl2_to_dl3.py` to create DL3 files, and `create_dl3_index_files.py` to create DL3 HDU and index files -## MAGIC+LST-1 analysis +## MAGIC+LST analysis: overview -MAGIC+LST-1 analysis starts from MAGIC calibrated data (\_Y\_ files), LST-1 DL1 data and SimTelArray DL0 data. The analysis flow is as following: +MAGIC+LST analysis starts from MAGIC calibrated data (\_Y\_ files), LST DL1 data and SimTelArray DL0 data. The analysis flow is as following: - `magic_calib_to_dl1.py` on real MAGIC data, to convert them into DL1 format - `lst1_magic_mc_dl0_to_dl1.py` over SimTelArray MCs to convert them into DL1 format @@ -31,6 +31,260 @@ MAGIC+LST-1 analysis starts from MAGIC calibrated data (\_Y\_ files), LST-1 DL1 - `lst1_magic_create_irf.py` to create the IRF - `lst1_magic_dl2_to_dl3.py` to create DL3 files, and `create_dl3_index_files.py` to create DL3 HDU and index files -## High level analysis +## MAGIC+LST analysis: data reduction tutorial (PRELIMINARY) -The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. Note that the notebooks run with gammapy v0.20 or higher, therefore another conda environment is needed to run them, since the MAGIC+LST-1 pipeline at the moment depends on v0.19. +1) The very first step to reduce MAGIC-LST data is to have remote access/credentials to the IT Container, so provide one. Once you have it, the connection steps are the following: + +Authorized institute server (Client) → ssh connection to CTALaPalma → ssh connection to cp01/02 + +2) Once connected to the IT Container, install MAGIC-CTA-PIPE (e.g. in your home directory in the IT Container) following the tutorial here: https://github.com/ranieremenezes/magic-cta-pipe + +3) Do not forget to open the magic-lst environment with the command `conda activate magic-lst` before starting the analysis + +### DL0 to DL1 + +In this step, we will convert the MAGIC and Monte Carlo (MC) Data Level (DL) 0 to DL1 (our goal is to reach DL3). + +Now copy all the python scripts available here to your preferred directory (e.g. /fefs/aswg/workspace/yourname/yourprojectname) in the IT Container, as well as the files `config_general.yaml`, `MAGIC_runs.txt` and `LST_runs.txt`. + +The file `config_general.yaml` must contain the telescope IDs and the directories with the MC data, as shown below: +``` +mc_tel_ids: + LST-1: 1 + LST-2: 0 + LST-3: 0 + LST-4: 0 + MAGIC-I: 2 + MAGIC-II: 3 + +directories: + workspace_dir : "/fefs/aswg/workspace/yourname/yourprojectname/" + target_name : "CrabTeste" + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" + MC_electrons : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Electrons/sim_telarray/" + MC_helium : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Helium/sim_telarray/" + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" + MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + +general: + SimTel_version: "v1.4" + focal_length : "effective" #effective #nominal + MAGIC_runs : "MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" + LST_runs : "LST_runs.txt" + proton_train : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest + +``` + +The file `MAGIC_runs.txt` looks like that: +``` +2020_11_19,5093174 +2020_11_19,5093175 +2020_12_08,5093491 +2020_12_08,5093492 +2020_12_16,5093711 +2020_12_16,5093712 +2020_12_16,5093713 +2020_12_16,5093714 +2021_02_14,5094483 +2021_02_14,5094484 +2021_02_14,5094485 +2021_02_14,5094486 +2021_02_14,5094487 +2021_02_14,5094488 +2021_03_16,5095265 +2021_03_16,5095266 +2021_03_16,5095267 +2021_03_16,5095268 +2021_03_16,5095271 +2021_03_16,5095272 +2021_03_16,5095273 +2021_03_16,5095277 +2021_03_16,5095278 +2021_03_16,5095281 +2021_03_18,5095376 +2021_03_18,5095377 +2021_03_18,5095380 +2021_03_18,5095381 +2021_03_18,5095382 +2021_03_18,5095383 +``` + + +The columns here represent the night and run in which you want to select data. Please do not add blank spaces in the rows, as these names will be used to i) find the MAGIC data in the IT Container and ii) create the subdirectories in your working directory. If there is no MAGIC data, please fill this file with "0,0". Similarly, the `LST_runs.txt` file looks like this: + +``` +2020_11_18,2923 +2020_11_18,2924 +2020_12_07,3093 +2020_12_15,3265 +2020_12_15,3266 +2020_12_15,3267 +2020_12_15,3268 +2021_02_13,3631 +2021_02_13,3633 +2021_02_13,3634 +2021_02_13,3635 +2021_02_13,3636 +2021_03_15,4069 +2021_03_15,4070 +2021_03_15,4071 +2021_03_17,4125 +``` +Note that the LST nights appear as being one day before MAGIC's!!! This is because LST saves the date at the beginning of the night, while MAGIC saves it at the end. If there is no LST data, please fill this file with "0,0". These files are the only ones we need to modify in order to convert DL0 into DL1 data. + +In this analysis, we use a wobble of 0.4°. + +To convert the MAGIC and SimTelArray MCs data into DL1 format, you first do the following: +> $ python setting_up_config_and_dir.py + +``` +***** Linking MC paths - this may take a few minutes ****** +*** Reducing DL0 to DL1 data - this can take many hours *** +Process name: yourprojectnameCrabTeste +To check the jobs submitted to the cluster, type: squeue -n yourprojectnameCrabTeste +``` +Note that this script can be run as +> $ python setting_up_config_and_dir.py --partial-analysis onlyMAGIC + +or + +> $ python setting_up_config_and_dir.py --partial-analysis onlyMC + +if you want to convert only MAGIC or only MC DL0 files to DL1, respectively. + + +The script `setting_up_config_and_dir.py` does a series of things: +- Creates a directory with your source name within the directory `yourprojectname` and several subdirectories inside it that are necessary for the rest of the data reduction. +- Generates a configuration file called config_step1.yaml with and telescope ID information and adopted imaging/cleaning cuts, and puts it in the directory created in the previous step. +- Links the MAGIC and MC data addresses to their respective subdirectories defined in the previous steps. +- Runs the scripts `lst1_magic_mc_dl0_to_dl1.py` and `magic_calib_to_dl1.py` for each one of the linked data files. + +In the file `config_general.yaml`, the sequence of telescopes is always LST1, LST2, LST3, LST4, MAGIC-I, MAGIC-II. So in this tutorial, we have +LST-1 ID = 1 +LST-2 ID = 0 +LST-3 ID = 0 +LST-4 ID = 0 +MAGIC-I ID = 2 +MAGIC-II ID = 3 +If the telescope ID is set to 0, this means that the telescope is not used in the analysis. + +You can check if this process is done by typing +> $ squeue -n yourprojectnameCrabTeste +or +> $ squeue -u your_user_name + +in the terminal. Once it is done, all of the subdirectories in `/fefs/aswg/workspace/yourname/yourprojectname/CrabTeste/DL1/` will be filled with files of the type `dl1_[...]_LST1_MAGIC1_MAGIC2_runXXXXXX.h5` for the MCs and `dl1_MX.RunXXXXXX.0XX.h5` for the MAGIC runs. The next step of the conversion of DL0 to DL1 is to split the DL1 MC proton sample into "train" and "test" datasets (these will be used later in the Random Forest event classification and to do some diagnostic plots) and to merge all the MAGIC data files such that in the end, we have only one datafile per night. To do so, we run the following script: + +> $ python merging_runs_and_splitting_training_samples.py + +``` +***** Splitting protons into 'train' and 'test' datasets... +***** Generating merge bashscripts... +***** Running merge_hdf_files.py in the MAGIC data files... +Process name: merging_CrabTeste +To check the jobs submitted to the cluster, type: squeue -n merging_CrabTeste +``` + +This script will slice the proton MC sample according to the entry "proton_train" in the "config_general.yaml" file, and then it will merge the MAGIC data files in the following order: +- MAGIC subruns are merged into single runs. +- MAGIC I and II runs are merged (only if both telescopes are used, of course). +- All runs in specific nights are merged, such that in the end we have only one datafile per night. +- Proton MC training data is merged. +- Proton MC testing data is merged. +- Diffuse MC gammas are merged. +- MC gammas are merged. + +### Coincident events and stereo parameters on DL1 + +To find coincident events between MAGIC and LST, starting from DL1 data, we run the following script: + +> $ python coincident_events.py + +This script creates the file config_coincidence.yaml containing the telescope IDs and the following parameters: +``` +event_coincidence: + timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time" + window_half_width: "300 ns" + time_offset: + start: "-10 us" + stop: "0 us" +``` + +It then links the real LST data files to the output directory [...]DL1/Observations/Coincident, and runs the script lst1_magic_event_coincidence.py in all of them. + +Once it is done, we add stereo parameters to the MAGIC+LST coincident DL1 data by running: + +> $ python stereo_events.py + +This script creates the file config_stereo.yaml with the follwoing parameters: +``` +stereo_reco: + quality_cuts: "(intensity > 50) & (width > 0)" + theta_uplim: "6 arcmin" +``` + +It then creates the output directories for the DL1 with stereo parameters [...]DL1/Observations/Coincident_stereo/SEVERALNIGHTS and [...]/DL1/MC/GAMMAorPROTON/Merged/StereoMerged, and then runs the script lst1_magic_stereo_reco.py in all of the coincident DL1 files. The stereo DL1 files for MC and real data are then saved in these directories. + +### Random forest + +Once we have the DL1 stereo parameters for all real and MC data, we can train the Random Forest: + +> $ python RF.py + +This script creates the file config_RF.yaml with several parameters related to the energy regressor, disp regressor, and event classifier, and then computes the RF (energy, disp, and classifier) based on the merged-stereo MC diffuse gammas and training proton samples by calling the script lst1_magic_train_rfs.py. The results are saved in [...]/DL1/MC/RFs. + +Once it is done, we can finally convert our DL1 stereo data files into DL2 by running: + +> $ python DL1_to_DL2.py + +This script runs `lst1_magic_dl1_stereo_to_dl2.py` on all DL1 stereo files, which applies the RFs saved in [...]/DL1/MC/RFs to stereo DL1 data (real and test MCs) and produces DL2 real and MC data. The results are saved in [...]/DL2/Observations and [...]/DL2/MC. + +### Instrument response function and DL3 + +Once the previous step is done, we compute the IRF with + +> $ python IRF.py + +which creates the configuration file config_IRF.yaml with several parameters. The main of which are shown below: + +``` +[...] +quality_cuts: "disp_diff_mean < 0.22" +event_type: "software" # select "software", "software_only_3tel", "magic_only" or "hardware" +weight_type_dl2: "simple" # select "simple", "variance" or "intensity" +[...] +gammaness: + cut_type: "dynamic" # select "global" or "dynamic" + [...] + +theta: + cut_type: "global" # select "global" or "dynamic" + global_cut_value: "0.2 deg" # used for the global cut + [...] +``` + +It then runs the script lst1_magic_create_irf.py over the DL2 MC gammas, generating the IRF and saving it at [...]/IRF. + +Optionally, but recommended, we can run the "diagnostic.py" script with: + +> $ python diagnostic.py + +This will create several diagnostic plots (gammaness, effective area, angular resolution, energy resolution, migration matrix, energy bias, and gamma-hadron classification comparisons. All of these plots will be saved in the directory defined on "target_name" in the config_general.yaml file. + +After the IRF, we run the DL2-to-DL3 conversion by doing: + +> $ python DL2_to_DL3.py + +which will save the DL3 files in the directory [...]/DL3. Finally, the last script to run is `create_dl3_index_files.py`. Since it is very fast, we can simply run it directly in the interactive mode by doing (remember that we must be in the magic-lst environment): + +> $ python create_dl3_index_files.py --input-dir ./CrabTeste/DL3 + +That's it. Now you can play with the DL3 data using the high-level notebooks. + +## High-level analysis + +Since the DL3 may have only a few MBs, it is typically convenient to download it to your own machine at this point. It will be necessary to have astropy and gammapy (version > 0.20) installed before proceeding. + +We prepared a [Jupyter Notebook](https://github.com/ranieremenezes/magic-cta-pipe/blob/master/magicctapipe/scripts/lst1_magic/SED_and_LC_from_DL3.ipynb) that quickly creates a counts map, a significance curve, an SED, and a light curve. You can give it a try. + +The folder [Notebooks](https://github.com/cta-observatory/magic-cta-pipe/tree/master/notebooks) contains Jupyter notebooks to perform checks on the IRF, to produce theta2 plots and SEDs. Note that the notebooks run with gammapy v0.20 or higher, while the gammapy version adopted in the MAGIC+LST-1 pipeline is v0.19. diff --git a/magicctapipe/scripts/lst1_magic/coincident_events.py b/magicctapipe/scripts/lst1_magic/coincident_events.py new file mode 100644 index 000000000..2bc4749f5 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/coincident_events.py @@ -0,0 +1,177 @@ +""" +This scripts facilitates the usage of the script +"lst1_magic_event_coincidence.py". This script is +more like a "maneger" that organizes the analysis +process by: +1) Creating the bash scripts for looking for +coincidence events between MAGIC and LST in each +night. +2) Creating the subdirectories for the coincident +event files. + + +Usage: +$ python coincident_events.py + +""" + +import os +import numpy as np +import glob +import yaml +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + +def configfile_coincidence(ids, target_dir): + + """ + This function creates the configuration file needed for the event coincidence step + + Parameters + ---------- + ids: list + list of telescope IDs + target_dir: str + Path to the working directory + """ + + f = open(target_dir+'/config_coincidence.yaml','w') + f.write("mc_tel_ids:\n LST-1: "+str(ids[0])+"\n LST-2: "+str(ids[1])+"\n LST-3: "+str(ids[2])+"\n LST-4: "+str(ids[3])+"\n MAGIC-I: "+str(ids[4])+"\n MAGIC-II: "+str(ids[5])+"\n\n") + f.write('event_coincidence:\n timestamp_type_lst: "dragon_time" # select "dragon_time", "tib_time" or "ucts_time"\n window_half_width: "300 ns"\n') + f.write(' time_offset:\n start: "-10 us"\n stop: "0 us"\n') + f.close() + + +def linking_lst(target_dir, LST_runs): + + """ + This function links the LST data paths to the working directory. This is a preparation step required for running lst1_magic_event_coincidence.py + + Parameters + ---------- + target_dir: str + Path to the working directory + LST_runs: matrix of strings + This matrix is imported from config_general.yaml and tells the function where to find the LST data and link them to our working directory + """ + + + coincidence_DL1_dir = target_dir+"/DL1/Observations" + if not os.path.exists(coincidence_DL1_dir+"/Coincident"): + os.mkdir(f"{coincidence_DL1_dir}/Coincident") + + for i in LST_runs: + lstObsDir = i[0].split("_")[0]+i[0].split("_")[1]+i[0].split("_")[2] + inputdir = f'/fefs/aswg/data/real/DL1/{lstObsDir}/v0.9/tailcut84' + outputdir = f'{coincidence_DL1_dir}/Coincident/{lstObsDir}' + list_of_subruns = np.sort(glob.glob(f"{inputdir}/dl1*Run*{i[1]}*.*.h5")) + if os.path.exists(f"{outputdir}/list_LST.txt"): + with open(f"{outputdir}/list_LST.txt", "a") as LSTdataPathFile: + for subrun in list_of_subruns: + LSTdataPathFile.write(subrun+"\n") #If this files already exists, simply append the new information + else: + os.mkdir(outputdir) + f = open(f"{outputdir}/list_LST.txt", "w") #If the file list_LST.txt does not exist, it will be created here + for subrun in list_of_subruns: + f.write(subrun+"\n") + f.close() + + +def bash_coincident(target_dir): + + """ + This function generates the bashscript for running the coincidence analysis. + + Parameters + ---------- + target_dir: str + Path to the working directory + """ + + process_name = target_dir.split("/")[-2:][1] + + listOfNightsLST = np.sort(glob.glob(target_dir+"/DL1/Observations/Coincident/*")) + listOfNightsMAGIC = np.sort(glob.glob(target_dir+"/DL1/Observations/Merged/Merged*")) + + for nightMAGIC,nightLST in zip(listOfNightsMAGIC,listOfNightsLST): + process_size = len(np.genfromtxt(nightLST+"/list_LST.txt",dtype="str")) - 1 + + f = open(f"LST_coincident_{nightLST.split('/')[-1]}.sh","w") + f.write("#!/bin/sh\n\n") + f.write("#SBATCH -p short\n") + f.write("#SBATCH -J "+process_name+"_coincidence\n") + f.write(f"#SBATCH --array=0-{process_size}%50\n") + f.write("#SBATCH -N 1\n\n") + f.write("ulimit -l unlimited\n") + f.write("ulimit -s unlimited\n") + f.write("ulimit -a\n\n") + + f.write(f"export INM={nightMAGIC}\n") + f.write(f"export OUTPUTDIR={nightLST}\n") + f.write("SAMPLE_LIST=($(<$OUTPUTDIR/list_LST.txt))\n") + f.write("SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n") + f.write("export LOG=$OUTPUTDIR/coincidence_${SLURM_ARRAY_TASK_ID}.log\n") + f.write(f"conda run -n magic-lst python lst1_magic_event_coincidence.py --input-file-lst $SAMPLE --input-dir-magic $INM --output-dir $OUTPUTDIR --config-file {target_dir}/config_coincidence.yaml >$LOG 2>&1") + f.close() + + + +def main(): + + """ + Here we read the config_general.yaml file and call the functions defined above. + """ + + + with open("config_general.yaml", "rb") as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + telescope_ids = list(config["mc_tel_ids"].values()) + target_dir = str(Path(config["directories"]["workspace_dir"]))+"/"+config["directories"]["target_name"] + + LST_runs_and_dates = config["general"]["LST_runs"] + LST_runs = np.genfromtxt(LST_runs_and_dates,dtype=str,delimiter=',') + + print("***** Generating file config_coincidence.yaml...") + print("***** This file can be found in ",target_dir) + configfile_coincidence(telescope_ids,target_dir) + + + print("***** Linking the paths to LST data files...") + linking_lst(target_dir, LST_runs) #linking the data paths to current working directory + + + print("***** Generating the bashscript...") + bash_coincident(target_dir) + + + print("***** Submitting processess to the cluster...") + print("Process name: "+target_dir.split("/")[-2:][1]+"_coincidence") + print("To check the jobs submitted to the cluster, type: squeue -n "+target_dir.split("/")[-2:][1]+"_coincidence") + + #Below we run the bash scripts to find the coincident events + list_of_coincidence_scripts = np.sort(glob.glob("LST_coincident*.sh")) + + for n,run in enumerate(list_of_coincidence_scripts): + if n == 0: + launch_jobs = f"coincidence{n}=$(sbatch --parsable {run})" + else: + launch_jobs = launch_jobs + f" && coincidence{n}=$(sbatch --parsable --dependency=afterany:$coincidence{n-1} {run})" + + #print(launch_jobs) + os.system(launch_jobs) + +if __name__ == "__main__": + main() + + + + + + + + diff --git a/magicctapipe/scripts/lst1_magic/config_general.yaml b/magicctapipe/scripts/lst1_magic/config_general.yaml new file mode 100644 index 000000000..dd2cfadde --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/config_general.yaml @@ -0,0 +1,26 @@ +mc_tel_ids: + LST-1: 1 + LST-2: 0 + LST-3: 0 + LST-4: 0 + MAGIC-I: 2 + MAGIC-II: 3 + +directories: + workspace_dir : "/fefs/aswg/workspace/raniere/" + target_name : "CrabTeste" + MC_gammas : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/sim_telarray" + MC_electrons : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Electrons/sim_telarray/" + MC_helium : "/fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/Helium/sim_telarray/" + MC_protons : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/Protons/dec_2276/sim_telarray" + MC_gammadiff : "/fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_2276/sim_telarray/" + +general: + target_RA_deg : 83.633083 #RA in degrees + target_Dec_deg: 22.0145 #Dec in degrees + SimTel_version: "v1.4" + focal_length : "effective" #effective #nominal + MAGIC_runs : "MAGIC_runs.txt" #If there is no MAGIC data, please fill this file with "0, 0" + LST_runs : "LST_runs.txt" + proton_train : 0.8 # 0.8 means that 80% of the DL1 protons will be used for training the Random Forest + diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_event_coincidence.py b/magicctapipe/scripts/lst1_magic/lst1_magic_event_coincidence.py index 8f3fbf771..2cf258a2a 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_event_coincidence.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_event_coincidence.py @@ -2,24 +2,24 @@ # coding: utf-8 """ -This script searches for coincident events from LST-1 and MAGIC joint +This script searches for coincident events from LST and MAGIC joint observation data offline using their timestamps. It applies the -coincidence window to LST-1 events, and checks the coincidence within +coincidence window to LST events, and checks the coincidence within the time offset region specified in the configuration file. Since the optimal time offset changes depending on the telescope distance along the pointing direction, it is recommended to input one subrun file for -LST-1 data, whose observation time is usually around 10 seconds so the +LST data, whose observation time is usually around 10 seconds so the change of the distance is negligible. The MAGIC standard stereo analysis discards the events when one of the telescope images cannot survive the cleaning or fail to compute the DL1 parameters. However, it's possible to perform the stereo analysis if -LST-1 sees these events. Thus, it checks the coincidence for each -telescope combination (i.e., LST1 + M1 and LST1 + M2) and keeps the +LST sees these events. Thus, it checks the coincidence for each +telescope combination (e.g., LST1 + M1 and LST1 + M2) and keeps the MAGIC events even if they do not have their MAGIC-stereo counterparts. -The MAGIC-stereo events, observed during the LST-1 observation time -period but not coincident with any LST-1 events, are also saved in the +The MAGIC-stereo events, observed during the LST observation time +period but not coincident with any LST events, are also saved in the output file, but they are not yet used for the high level analysis. Unless there is any particular reason, please use the default half width @@ -27,25 +27,24 @@ accidental coincidence rate as much as possible by keeping the number of actual coincident events. -Please note that the time offset depends on the date of observations -as summarized below: -* before June 12 2021: -3.1 us -* June 13 2021 to Feb 28 2023: -6.5 us -* March 10 2023 to March 30 2023: -76039.3 us -* April 13 2023 to August 2023: -25.1 us -* after Sep 11 2023 : -6.2 us -By default, pre offset search is performed using large shower events. -The possible time offset is found among all possible combinations of -time offsets using those events. Finally, the time offset scan is performed -around the possible offset found by the pre offset search. Instead of that, -you can also define the offset scan range in the configuration file. - -Usage: +Please note that for the data taken before 12th June 2021, a coincidence +peak should be found around the time offset of -3.1 us, which can be +explained by the trigger time delays of both systems. For the data taken +after that date, however, there is an additional global offset appeared +and then the peak is shifted to the time offset of -6.5 us. Thus, it +would be needed to tune the offset scan region depending on the date +when data were taken. The reason of the shift is under investigation. + +Usage per single LST data file (indicated if you want to do tests): $ python lst1_magic_event_coincidence.py ---input-file-lst dl1/LST-1/dl1_LST-1.Run03265.0040.h5 +--input-file-lst dl1/LST/dl1_LST.Run03265.0040.h5 --input-dir-magic dl1/MAGIC (--output-dir dl1_coincidence) (--config-file config.yaml) + +Broader usage: +This script is called automatically from the script "coincident_events.py". +If you want to analyse a target, this is the way to go. See this other script for more details. """ import argparse @@ -61,14 +60,15 @@ from astropy import units as u from ctapipe.instrument import SubarrayDescription from magicctapipe.io import ( + format_object, get_stereo_events, load_lst_dl1_data_file, load_magic_dl1_data_files, save_pandas_data_in_table, + telescope_combinations, ) -from magicctapipe.io.io import TEL_NAMES -__all__ = ["event_coincidence"] +__all__ = ["telescope_positions","event_coincidence"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) @@ -80,44 +80,89 @@ # The final digit of timestamps TIME_ACCURACY = 100 * u.ns -# The telescope positions used in a simulation -TEL_POSITIONS = { - 1: [-8.09, 77.13, 0.78] * u.m, # LST-1 - 2: [39.3, -62.55, -0.97] * u.m, # MAGIC-I - 3: [-31.21, -14.57, 0.2] * u.m, # MAGIC-II -} + +def telescope_positions(config): + """ + This function computes the telescope positions with respect to the array baricenter. + The array can have any configuration, e.g.: M1+M2+LST1+LST2, the full MAGIC+LST array, etc. + + Parameters + ---------- + config: dict + dictionary generated from an yaml file with information about the telescope IDs. Typically evoked from "config_coincidence.yaml" in the main scripts. + + Returns + ------- + TEL_POSITIONS: dict + Dictionary with telescope positions in the baricenter reference frame of the adopted array. + """ + + #Telescope positions in meters in a generic reference frame: + RELATIVE_POSITIONS = { + "LST-1" : [-70.930, -52.070, 43.00], + "LST-2" : [-35.270, 66.140, 32.00], + "LST-3" : [75.280 , 50.490, 28.70], + "LST-4" : [30.910 , -64.540, 32.00], + "MAGIC-I" : [-23.540, -191.750, 41.25], + "MAGIC-II" : [-94.05, -143.770, 42.42] + } + + telescopes_in_use = {} + x = np.asarray([]) + y = np.asarray([]) + z = np.asarray([]) + for k, v in config["mc_tel_ids"].items(): + if v > 0: + telescopes_in_use[v] = RELATIVE_POSITIONS[k] + x = np.append(x,RELATIVE_POSITIONS[k][0]) + y = np.append(y,RELATIVE_POSITIONS[k][1]) + z = np.append(z,RELATIVE_POSITIONS[k][2]) + + average_xyz = np.asarray([np.mean(x), np.mean(y), np.mean(z)]) + + TEL_POSITIONS = {} + for k, v in telescopes_in_use.items(): + TEL_POSITIONS[k] = list(np.round(np.asarray(v)-average_xyz,2)) * u.m + + return TEL_POSITIONS + + def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): """ - Searches for coincident events from LST-1 and MAGIC joint + Searches for coincident events from LST and MAGIC joint observation data offline using their timestamps. Parameters ---------- input_file_lst: str - Path to an input LST-1 DL1 data file + Path to an input LST DL1 data file input_dir_magic: str Path to a directory where input MAGIC DL1 data files are stored output_dir: str Path to a directory where to save an output DL1 data file config: dict - Configuration for the LST-1 + MAGIC combined analysis + Configuration for the LST + MAGIC combined analysis """ config_coinc = config["event_coincidence"] - # Load the input LST-1 DL1 data file - logger.info(f"\nInput LST-1 DL1 data file: {input_file_lst}") + TEL_NAMES, _ = telescope_combinations(config) + + TEL_POSITIONS = telescope_positions(config) + + # Load the input LST DL1 data file + logger.info(f"\nInput LST DL1 data file: {input_file_lst}") event_data_lst, subarray_lst = load_lst_dl1_data_file(input_file_lst) # Load the input MAGIC DL1 data files logger.info(f"\nInput MAGIC directory: {input_dir_magic}") - event_data_magic, subarray_magic = load_magic_dl1_data_files(input_dir_magic) + event_data_magic, subarray_magic = load_magic_dl1_data_files(input_dir_magic, config) - # Exclude the parameters non-common to LST-1 and MAGIC data + # Exclude the parameters non-common to LST and MAGIC data timestamp_type_lst = config_coinc["timestamp_type_lst"] logger.info(f"\nLST timestamp type: {timestamp_type_lst}") @@ -137,21 +182,23 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): window_half_width = u.Quantity(window_half_width).to("ns") window_half_width = u.Quantity(window_half_width.round(), dtype=int) - pre_offset_search = False - if "pre_offset_search" in config_coinc: - pre_offset_search = config_coinc["pre_offset_search"] + logger.info("\nTime offsets:") + logger.info(format_object(config_coinc["time_offset"])) - if pre_offset_search: - logger.info("\nPre offset search will be performed.") - n_pre_offset_search_events = config_coinc["n_pre_offset_search_events"] - else: - logger.info("\noffset scan range defined in the config file will be used.") - offset_start = u.Quantity(config_coinc["time_offset"]["start"]) - offset_stop = u.Quantity(config_coinc["time_offset"]["stop"]) + offset_start = u.Quantity(config_coinc["time_offset"]["start"]) + offset_stop = u.Quantity(config_coinc["time_offset"]["stop"]) + + time_offsets = np.arange( + start=offset_start.to_value("ns").round(), + stop=offset_stop.to_value("ns").round(), + step=TIME_ACCURACY.to_value("ns").round(), + ) + + time_offsets = u.Quantity(time_offsets.round(), unit="ns", dtype=int) event_data = pd.DataFrame() features = pd.DataFrame() - profiles = pd.DataFrame(data={"time_offset": []}) + profiles = pd.DataFrame(data={"time_offset": time_offsets.to_value("us").round(1)}) # Arrange the LST timestamps. They are stored in the UNIX format in # units of seconds with 17 digits, 10 digits for the integral part @@ -171,10 +218,11 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): tel_ids = np.unique(event_data_magic.index.get_level_values("tel_id")) for tel_id in tel_ids: + tel_name = TEL_NAMES[tel_id] df_magic = event_data_magic.query(f"tel_id == {tel_id}").copy() - # Arrange the MAGIC timestamps as same as the LST-1 timestamps + # Arrange the MAGIC timestamps as same as the LST timestamps seconds = np.array([Decimal(str(time)) for time in df_magic["time_sec"]]) nseconds = np.array([Decimal(str(time)) for time in df_magic["time_nanosec"]]) @@ -184,123 +232,8 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): df_magic["timestamp"] = timestamps_magic.to_value("s") df_magic.drop(["time_sec", "time_nanosec"], axis=1, inplace=True) - # Pre offset search is performed to define the offset scan region. - # First, N events are extracted from largest intensity events for LST and - # MAGIC. Then, it counts the number of coincident events within a defined - # window after shifting all possible combinations (N x N) of time offsets. - if pre_offset_search: - logger.info( - "\nPre offset search using large-intensity shower events is ongoing..." - ) - - logger.info( - f"\nExtracting the {tel_name} events taken when LST-1 observed for pre offset search..." - ) - - time_lolim = timestamps_lst[0] - window_half_width - time_uplim = timestamps_lst[-1] + window_half_width - - cond_lolim = timestamps_magic >= time_lolim - cond_uplim = timestamps_magic <= time_uplim - - mask_lst_obs_window = np.logical_and(cond_lolim, cond_uplim) - n_events_magic = np.count_nonzero(mask_lst_obs_window) - - if n_events_magic == 0: - logger.info(f"--> No {tel_name} events are found. Skipping...") - continue - - logger.info(f"--> {n_events_magic} events are found.") - - # Extract indexes of MAGIC large shower events - index_large_intensity_magic = np.argsort( - df_magic["intensity"][mask_lst_obs_window] - )[::-1][:n_pre_offset_search_events] - - # If LST/MAGIC observations are not completely overlapped, only small - # numbers of MAGIC events are left for the pre offset search. - # To find large-intensity showers within the same time window, - # time cut around MAGIC observations is applied to the LST data set. - time_lolim = timestamps_magic[mask_lst_obs_window][0] - window_half_width - time_uplim = timestamps_magic[mask_lst_obs_window][-1] + window_half_width - - cond_lolim = timestamps_lst >= time_lolim - cond_uplim = timestamps_lst <= time_uplim - - mask_magic_obs_window = np.logical_and(cond_lolim, cond_uplim) - - if np.count_nonzero(mask_magic_obs_window) == 0: - logger.info( - f"\nNo LST events are found around {tel_name} events. Skipping..." - ) - continue - - # Extract indexes of LST large shower events - index_large_intensity_lst = np.argsort( - event_data_lst["intensity"][mask_magic_obs_window] - )[::-1][:n_pre_offset_search_events] - - # Crate an array of all combinations of [MAGIC timestamp, LST timestamp] - timestamps_magic_lst_combination = np.array( - np.meshgrid( - timestamps_magic[mask_lst_obs_window][ - index_large_intensity_magic - ].value, - timestamps_lst[mask_magic_obs_window][ - index_large_intensity_lst - ].value, - ) - ).reshape(2, -1) - - # Compute all combinations of time offset between MAGIC and LST - time_offsets_pre_search = ( - timestamps_magic_lst_combination[0] - - timestamps_magic_lst_combination[1] - ) - - time_offsets_pre_search = u.Quantity( - time_offsets_pre_search.round(), unit="ns", dtype=int - ) - - n_coincidences_pre_search = [ - np.sum( - np.abs(time_offsets_pre_search - time_offset).value - < window_half_width.value - ) - for time_offset in time_offsets_pre_search - ] - - n_coincidences_pre_search = np.array(n_coincidences_pre_search) - - offset_at_max_pre_search = time_offsets_pre_search[ - n_coincidences_pre_search == n_coincidences_pre_search.max() - ].mean() - offset_at_max_pre_search = offset_at_max_pre_search.to("us").round(1) - - logger.info( - f"\nPre offset search finds {offset_at_max_pre_search} as a possible offset" - ) - - # offset scan region is defined as 3 x half window width - # around the offset_at_max to cover "full window width" which will - # be used to compute weighted average of the time offset - offset_start = offset_at_max_pre_search - 3 * window_half_width - offset_stop = offset_at_max_pre_search + 3 * window_half_width - - logger.info("\nTime offsets scan region:") - logger.info(f" start: {offset_start.to('us').round(1)}") - logger.info(f" stop: {offset_stop.to('us').round(1)}") - - time_offsets = np.arange( - start=offset_start.to_value("ns").round(), - stop=offset_stop.to_value("ns").round(), - step=TIME_ACCURACY.to_value("ns").round(), - ) - - time_offsets = u.Quantity(time_offsets.round(), unit="ns", dtype=int) - - # Extract the MAGIC events taken when LST-1 observed - logger.info(f"\nExtracting the {tel_name} events taken when LST-1 observed...") + # Extract the MAGIC events taken when LST observed + logger.info(f"\nExtracting the {tel_name} events taken when LST observed...") time_lolim = timestamps_lst[0] + time_offsets[0] - window_half_width time_uplim = timestamps_lst[-1] + time_offsets[-1] + window_half_width @@ -321,7 +254,7 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): timestamps_magic = timestamps_magic[mask] # Start checking the event coincidence. The time offsets and the - # coincidence window are applied to the LST-1 events, and the + # coincidence window are applied to the LST events, and the # MAGIC events existing in the window, including the edges, are # recognized as the coincident events. At first, we scan the # number of coincident events in each time offset and find the @@ -335,6 +268,7 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): logger.info("\nChecking the event coincidence...") for time_offset in time_offsets: + times_lolim = timestamps_lst + time_offset - window_half_width times_uplim = timestamps_lst + time_offset + window_half_width @@ -392,7 +326,7 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): logger.info(f"--> Number of coincident events: {n_events_at_avg}") logger.info(f"--> Fraction over the {tel_name} events: {percentage:.1f}%") - # Keep only the LST-1 events coincident with the MAGIC events, + # Keep only the LST events coincident with the MAGIC events, # and assign the MAGIC observation and event IDs to them indices_lst, indices_magic = np.where(mask) @@ -406,8 +340,8 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): df_lst.reset_index(inplace=True) df_lst.set_index(["obs_id_magic", "event_id_magic", "tel_id"], inplace=True) - # Assign also the LST-1 observation and event IDs to the MAGIC - # events coincident with the LST-1 events + # Assign also the LST observation and event IDs to the MAGIC + # events coincident with the LST events obs_ids_lst = df_lst["obs_id_lst"].to_numpy() event_ids_lst = df_lst["event_id_lst"].to_numpy() @@ -441,8 +375,7 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): event_data = pd.concat([event_data, df_lst, df_magic]) features = pd.concat([features, df_feature]) - profiles = profiles.merge(df_profile, on="time_offset", how="outer") - profiles = profiles.sort_values("time_offset") + profiles = profiles.merge(df_profile) if event_data.empty: logger.info("\nNo coincident events are found. Exiting...") @@ -452,11 +385,11 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): event_data.drop_duplicates(inplace=True) # It sometimes happen that even if it is a MAGIC-stereo event, only - # M1 or M2 event is coincident with a LST-1 event. In that case we + # M1 or M2 event is coincident with a LST event. In that case we # keep both M1 and M2 events, since they are recognized as the same # shower event by the MAGIC-stereo hardware trigger. - # We also keep the MAGIC-stereo events not coincident with any LST-1 + # We also keep the MAGIC-stereo events not coincident with any LST # events, since the stereo reconstruction is still feasible, but not # yet used for the high level analysis. @@ -474,7 +407,7 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): event_data.set_index(["obs_id", "event_id", "tel_id"], inplace=True) event_data.sort_index(inplace=True) - event_data = get_stereo_events(event_data) + event_data = get_stereo_events(event_data, config) event_data.reset_index(inplace=True) event_data = event_data.astype({"obs_id": int, "event_id": int}) @@ -484,7 +417,7 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): input_file_name = Path(input_file_lst).name - output_file_name = input_file_name.replace("LST-1", "LST-1_MAGIC") + output_file_name = input_file_name.replace("LST", "MAGIC_LST") output_file = f"{output_dir}/{output_file_name}" save_pandas_data_in_table( @@ -500,24 +433,27 @@ def event_coincidence(input_file_lst, input_dir_magic, output_dir, config): ) # Create the subarray description with the telescope coordinates - # relative to the center of the LST-1 and MAGIC positions - tel_descriptions = { - 1: subarray_lst.tel[1], # LST-1 - 2: subarray_magic.tel[2], # MAGIC-I - 3: subarray_magic.tel[3], # MAGIC-II - } - - subarray_lst1_magic = SubarrayDescription( - "LST1-MAGIC-Array", TEL_POSITIONS, tel_descriptions + # relative to the center of the LST and MAGIC positions + tel_descriptions = {} + for k, v in TEL_NAMES.items(): + if v[:3] == "LST": + tel_descriptions[k] = subarray_lst.tel[k] + else: + tel_descriptions[k] = subarray_magic.tel[k] + + + subarray_lst_magic = SubarrayDescription( + "LST-MAGIC-Array", TEL_POSITIONS, tel_descriptions ) # Save the subarray description - subarray_lst1_magic.to_hdf(output_file) + subarray_lst_magic.to_hdf(output_file) logger.info(f"\nOutput file: {output_file}") def main(): + start_time = time.time() parser = argparse.ArgumentParser() @@ -528,7 +464,7 @@ def main(): dest="input_file_lst", type=str, required=True, - help="Path to an input LST-1 DL1 data file", + help="Path to an input LST DL1 data file", ) parser.add_argument( @@ -575,4 +511,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_mc_dl0_to_dl1.py b/magicctapipe/scripts/lst1_magic/lst1_magic_mc_dl0_to_dl1.py index 41cf519ee..2c9b43bfb 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_mc_dl0_to_dl1.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_mc_dl0_to_dl1.py @@ -2,32 +2,31 @@ # coding: utf-8 """ -This script processes LST-1 and MAGIC events of simtel MC DL0 data +This script processes LST and MAGIC events of simtel MC DL0 data (*.simtel.gz) and computes the DL1 parameters, i.e., Hillas, timing and -leakage parameters. It saves only the events that all the DL1 parameters +leakage parameters. It saves only the events where all the DL1 parameters are successfully reconstructed. -Since it cannot identify the telescopes of the input file, please assign +Since it cannot identify the telescopes from the input file, please assign the correct telescope ID to each telescope in the configuration file. -When saving data to an output file, the telescope IDs will be reset to -the following ones to match with those of real data: - -LST-1: tel_id = 1, MAGIC-I: tel_id = 2, MAGIC-II: tel_id = 3 - -In addition, the telescope coordinate will be converted to the one -relative to the center of the LST-1 and MAGIC positions (including the +The telescope coordinates will be converted to those +relative to the center of the LST and MAGIC positions (including the altitude) for the convenience of the geometrical stereo reconstruction. -Usage: +Usage per single data file (indicated if you want to do tests): $ python lst1_magic_mc_dl0_to_dl1.py --input-file dl0/gamma_40deg_90deg_run1.simtel.gz (--output-dir dl1) -(--config-file config.yaml) +(--config-file config_step1.yaml) + +Broader usage: +This script is called automatically from the script "setting_up_config_and_dir.py". +If you want to analyse a target, this is the way to go. See this other script for more details. """ -import argparse -import logging +import argparse #Parser for command-line options, arguments etc +import logging #Used to manage the log file import re import time from pathlib import Path @@ -58,17 +57,101 @@ from magicctapipe.utils import calculate_disp, calculate_impact from traitlets.config import Config -__all__ = ["mc_dl0_to_dl1"] +__all__ = ["Calibrate_LST", "Calibrate_MAGIC","mc_dl0_to_dl1"] logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -# The CORSIKA particle types +# The CORSIKA particle types #CORSIKA simulates Cherenkov light PARTICLE_TYPES = {1: "gamma", 3: "electron", 14: "proton", 402: "helium"} -def mc_dl0_to_dl1(input_file, output_dir, config): + +def Calibrate_LST(event, tel_id, rng, config_lst, camera_geoms, calibrator_lst, increase_nsb, use_time_delta_cleaning, use_dynamic_cleaning ): + + """ + This function computes and returns signal_pixels, image, and peak_time for LST + """ + + calibrator_lst._calibrate_dl0(event, tel_id) + calibrator_lst._calibrate_dl1(event, tel_id) + + image = event.dl1.tel[tel_id].image.astype(np.float64) + peak_time = event.dl1.tel[tel_id].peak_time.astype(np.float64) + + increase_psf = config_lst["increase_psf"]["use"] + use_only_main_island = config_lst["use_only_main_island"] + + if increase_nsb: + # Add extra noise in pixels + image = add_noise_in_pixels(rng, image, **config_lst["increase_nsb"]) + + if increase_psf: + # Smear the image + image = random_psf_smearer( + image=image, + fraction=config_lst["increase_psf"]["fraction"], + indices=camera_geoms[tel_id].neighbor_matrix_sparse.indices, + indptr=camera_geoms[tel_id].neighbor_matrix_sparse.indptr, + ) + + # Apply the image cleaning + signal_pixels = tailcuts_clean( + camera_geoms[tel_id], image, **config_lst["tailcuts_clean"] + ) + + if use_time_delta_cleaning: + signal_pixels = apply_time_delta_cleaning( + geom=camera_geoms[tel_id], + mask=signal_pixels, + arrival_times=peak_time, + **config_lst["time_delta_cleaning"], + ) + + if use_dynamic_cleaning: + signal_pixels = apply_dynamic_cleaning( + image, signal_pixels, **config_lst["dynamic_cleaning"] + ) + + if use_only_main_island: + _, island_labels = number_of_islands(camera_geoms[tel_id], signal_pixels) + n_pixels_on_island = np.bincount(island_labels.astype(np.int64)) + + # The first index means the pixels not surviving + # the cleaning, so should not be considered + n_pixels_on_island[0] = 0 + max_island_label = np.argmax(n_pixels_on_island) + signal_pixels[island_labels != max_island_label] = False + + return signal_pixels, image, peak_time + + +def Calibrate_MAGIC(event, tel_id, config_magic, magic_clean, calibrator_magic): + + """ + This function computes and returns signal_pixels, image, and peak_time for MAGIC + """ + + calibrator_magic._calibrate_dl0(event, tel_id) + calibrator_magic._calibrate_dl1(event, tel_id) + + image = event.dl1.tel[tel_id].image.astype(np.float64) + peak_time = event.dl1.tel[tel_id].peak_time.astype(np.float64) + use_charge_correction = config_magic["charge_correction"]["use"] + + if use_charge_correction: + # Scale the charges by the correction factor + image *= config_magic["charge_correction"]["factor"] + + # Apply the image cleaning + signal_pixels, image, peak_time = magic_clean[tel_id].clean_image( + event_image=image, event_pulse_time=peak_time + ) + return signal_pixels, image, peak_time + + +def mc_dl0_to_dl1(input_file, output_dir, config, focal_length): """ Processes LST-1 and MAGIC events of simtel MC DL0 data and computes the DL1 parameters. @@ -83,28 +166,24 @@ def mc_dl0_to_dl1(input_file, output_dir, config): Configuration for the LST-1 + MAGIC analysis """ - assigned_tel_ids = config["mc_tel_ids"] + assigned_tel_ids = config["mc_tel_ids"] #This variable becomes the dictionary {'LST-1': 1, 'MAGIC-I': 2, 'MAGIC-II': 3} - logger.info("\nAssigned telescope IDs:") + logger.info("\nAssigned telescope IDs:") #Here we are just adding infos to the log file logger.info(format_object(assigned_tel_ids)) - tel_id_lst1 = assigned_tel_ids["LST-1"] - tel_id_m1 = assigned_tel_ids["MAGIC-I"] - tel_id_m2 = assigned_tel_ids["MAGIC-II"] - # Load the input file logger.info(f"\nInput file: {input_file}") event_source = EventSource( input_file, - allowed_tels=list(assigned_tel_ids.values()), - focal_length_choice="effective", + allowed_tels=list(filter(lambda check_id: check_id > 0, assigned_tel_ids.values())), #Here we load the events for all telescopes with ID > 0. + focal_length_choice=focal_length, ) obs_id = event_source.obs_ids[0] - subarray = event_source.subarray + subarray = event_source.subarray - tel_descriptions = subarray.tel + tel_descriptions = subarray.tel tel_positions = subarray.positions logger.info("\nSubarray description:") @@ -136,7 +215,7 @@ def mc_dl0_to_dl1(input_file, output_dir, config): logger.info(format_object(config_lst["increase_psf"])) increase_nsb = config_lst["increase_nsb"].pop("use") - increase_psf = config_lst["increase_psf"].pop("use") + increase_psf = config_lst["increase_psf"]["use"] if increase_nsb: rng = np.random.default_rng(obs_id) @@ -177,8 +256,6 @@ def mc_dl0_to_dl1(input_file, output_dir, config): logger.info("\nMAGIC charge correction:") logger.info(format_object(config_magic["charge_correction"])) - use_charge_correction = config_magic["charge_correction"].pop("use") - if config_magic["magic_clean"]["find_hotpixels"]: logger.warning( "\nWARNING: Hot pixels do not exist in a simulation. " @@ -189,11 +266,6 @@ def mc_dl0_to_dl1(input_file, output_dir, config): logger.info("\nMAGIC image cleaning:") logger.info(format_object(config_magic["magic_clean"])) - magic_clean = { - tel_id_m1: MAGICClean(camera_geoms[tel_id_m1], config_magic["magic_clean"]), - tel_id_m2: MAGICClean(camera_geoms[tel_id_m2], config_magic["magic_clean"]), - } - # Prepare for saving data to an output file Path(output_dir).mkdir(exist_ok=True, parents=True) @@ -207,12 +279,34 @@ def mc_dl0_to_dl1(input_file, output_dir, config): zenith = 90 - sim_config["max_alt"].to_value("deg") azimuth = Angle(sim_config["max_az"]).wrap_at("360 deg").degree - + logger.info(np.asarray(list(assigned_tel_ids.values()))) + LSTs_IDs = np.asarray(list(assigned_tel_ids.values())[0:4]) + LSTs_in_use = np.where(LSTs_IDs > 0)[0] + 1 #Here we select which LSTs are/is in use + + if len(LSTs_in_use) == 0: + LSTs_in_use = ''.join(str(k) for k in LSTs_in_use) + elif len(LSTs_in_use) > 0: + LSTs_in_use = 'LST'+'_LST'.join(str(k) for k in LSTs_in_use) + print('lst',LSTs_in_use) + MAGICs_IDs = np.asarray(list(assigned_tel_ids.values())[4:6]) + MAGICs_in_use = np.where(MAGICs_IDs > 0)[0] + 1 #Here we select which MAGICs are/is in use + + if len(MAGICs_in_use) == 0: + MAGICs_in_use = ''.join(str(k) for k in MAGICs_in_use) + elif len(MAGICs_in_use) > 0: + MAGICs_in_use = 'MAGIC'+'_MAGIC'.join(str(k) for k in MAGICs_in_use) + magic_clean = {} + for k in MAGICs_IDs: + if k > 0: + magic_clean[k] = MAGICClean(camera_geoms[k], config_magic["magic_clean"]) + output_file = ( f"{output_dir}/dl1_{particle_type}_zd_{zenith.round(3)}deg_" - f"az_{azimuth.round(3)}deg_LST-1_MAGIC_run{obs_id}.h5" - ) + f"az_{azimuth.round(3)}deg_{LSTs_in_use}_{MAGICs_in_use}_run{obs_id}.h5" + ) #The files are saved with the names of all telescopes involved + + # Loop over every shower event logger.info("\nProcessing the events...") @@ -224,83 +318,24 @@ def mc_dl0_to_dl1(input_file, output_dir, config): tels_with_trigger = event.trigger.tels_with_trigger # Check if the event triggers both M1 and M2 or not - trigger_m1 = tel_id_m1 in tels_with_trigger - trigger_m2 = tel_id_m2 in tels_with_trigger + if((set(MAGICs_IDs).issubset(set(tels_with_trigger))) and (MAGICs_in_use=="MAGIC1_MAGIC2")): + magic_stereo = True #If both have trigger, then magic_stereo = True + else: + magic_stereo = False - magic_stereo = trigger_m1 and trigger_m2 + for tel_id in tels_with_trigger: - for tel_id in tels_with_trigger: - if tel_id == tel_id_lst1: + if tel_id in LSTs_IDs: ##If the ID is in the LST list, we call Calibrate_LST() # Calibrate the LST-1 event - calibrator_lst._calibrate_dl0(event, tel_id) - calibrator_lst._calibrate_dl1(event, tel_id) - - image = event.dl1.tel[tel_id].image.astype(np.float64) - peak_time = event.dl1.tel[tel_id].peak_time.astype(np.float64) - - if increase_nsb: - # Add extra noise in pixels - image = add_noise_in_pixels( - rng, image, **config_lst["increase_nsb"] - ) - - if increase_psf: - # Smear the image - image = random_psf_smearer( - image=image, - fraction=config_lst["increase_psf"]["fraction"], - indices=camera_geoms[tel_id].neighbor_matrix_sparse.indices, - indptr=camera_geoms[tel_id].neighbor_matrix_sparse.indptr, - ) - - # Apply the image cleaning - signal_pixels = tailcuts_clean( - camera_geoms[tel_id], image, **config_lst["tailcuts_clean"] - ) - - if use_time_delta_cleaning: - signal_pixels = apply_time_delta_cleaning( - geom=camera_geoms[tel_id], - mask=signal_pixels, - arrival_times=peak_time, - **config_lst["time_delta_cleaning"], - ) - - if use_dynamic_cleaning: - signal_pixels = apply_dynamic_cleaning( - image, signal_pixels, **config_lst["dynamic_cleaning"] - ) - - if use_only_main_island: - _, island_labels = number_of_islands( - camera_geoms[tel_id], signal_pixels - ) - n_pixels_on_island = np.bincount(island_labels.astype(np.int64)) - - # The first index means the pixels not surviving - # the cleaning, so should not be considered - n_pixels_on_island[0] = 0 - max_island_label = np.argmax(n_pixels_on_island) - signal_pixels[island_labels != max_island_label] = False - - else: + signal_pixels, image, peak_time = Calibrate_LST(event, tel_id, rng, config_lst, camera_geoms, calibrator_lst, increase_nsb, use_time_delta_cleaning, use_dynamic_cleaning) + elif tel_id in MAGICs_IDs: # Calibrate the MAGIC event - calibrator_magic._calibrate_dl0(event, tel_id) - calibrator_magic._calibrate_dl1(event, tel_id) - - image = event.dl1.tel[tel_id].image.astype(np.float64) - peak_time = event.dl1.tel[tel_id].peak_time.astype(np.float64) - - if use_charge_correction: - # Scale the charges by the correction factor - image *= config_magic["charge_correction"]["factor"] - - # Apply the image cleaning - signal_pixels, image, peak_time = magic_clean[tel_id].clean_image( - event_image=image, event_pulse_time=peak_time + signal_pixels, image, peak_time = Calibrate_MAGIC(event, tel_id, config_magic, magic_clean, calibrator_magic) + else: + logger.info( + f"--> Telescope ID {tel_id} not in LST list or MAGIC list. Please check if the IDs are OK in the configuration file" ) - - if not any(signal_pixels): + if not any(signal_pixels): #So: if there is no event, we skip it and go back to the loop in the next event logger.info( f"--> {event.count} event (event ID: {event.index.event_id}, " f"telescope {tel_id}) could not survive the image cleaning. " @@ -326,6 +361,15 @@ def mc_dl0_to_dl1(input_file, output_dir, config): # Parametrize the image hillas_params = hillas_parameters(camera_geom_masked, image_masked) + # + if any(np.isnan(value) for value in hillas_params.values()): + logger.info( + f"--> {event.count} event (event ID: {event.index.event_id}, " + f"telescope {tel_id}): non-valid Hillas parameters. Skipping..." + ) + continue + + timing_params = timing_parameters( camera_geom_masked, image_masked, peak_time_masked, hillas_params ) @@ -388,17 +432,10 @@ def mc_dl0_to_dl1(input_file, output_dir, config): n_islands=n_islands, magic_stereo=magic_stereo, ) - + # Reset the telescope IDs - if tel_id == tel_id_lst1: - event_info.tel_id = 1 - - elif tel_id == tel_id_m1: - event_info.tel_id = 2 - - elif tel_id == tel_id_m2: - event_info.tel_id = 3 - + event_info.tel_id = tel_id + # Save the parameters to the output file writer.write( "parameters", @@ -409,28 +446,29 @@ def mc_dl0_to_dl1(input_file, output_dir, config): logger.info(f"\nIn total {n_events_processed} events are processed.") # Convert the telescope coordinate to the one relative to the center - # of the LST-1 and MAGIC positions, and reset the telescope IDs + # of the LST and MAGIC positions, and reset the telescope IDs position_mean = u.Quantity(list(tel_positions.values())).mean(axis=0) - tel_positions_lst1_magic = { - 1: tel_positions[tel_id_lst1] - position_mean, # LST-1 - 2: tel_positions[tel_id_m1] - position_mean, # MAGIC-I - 3: tel_positions[tel_id_m2] - position_mean, # MAGIC-II - } - - tel_descriptions_lst1_magic = { - 1: tel_descriptions[tel_id_lst1], # LST-1 - 2: tel_descriptions[tel_id_m1], # MAGIC-I - 3: tel_descriptions[tel_id_m2], # MAGIC-II - } - - subarray_lst1_magic = SubarrayDescription( - "LST1-MAGIC-Array", tel_positions_lst1_magic, tel_descriptions_lst1_magic + tel_positions_lst_magic = {} + tel_descriptions_lst_magic = {} + IDs_in_use = np.asarray(list(assigned_tel_ids.values())) + IDs_in_use = IDs_in_use[IDs_in_use > 0] + for k in IDs_in_use: + tel_positions_lst_magic[k] = tel_positions[k] - position_mean + tel_descriptions_lst_magic[k] = tel_descriptions[k] + + + subarray_lst_magic = SubarrayDescription( + "LST-MAGIC-Array", tel_positions_lst_magic, tel_descriptions_lst_magic ) + tel_positions = subarray_lst_magic.positions + logger.info("\nTelescope positions:") + logger.info(format_object(tel_positions)) + # Save the subarray description - subarray_lst1_magic.to_hdf(output_file) - + subarray_lst_magic.to_hdf(output_file) + # Save the simulation configuration with HDF5TableWriter(output_file, group_name="simulation", mode="a") as writer: writer.write("config", sim_config) @@ -439,10 +477,15 @@ def mc_dl0_to_dl1(input_file, output_dir, config): def main(): + + """ Here we collect the input parameters from the command line, load the configuration file and run mc_dl0_to_dl1()""" + start_time = time.time() parser = argparse.ArgumentParser() - + + + #Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file parser.add_argument( "--input-file", "-i", @@ -466,17 +509,26 @@ def main(): "-c", dest="config_file", type=str, - default="./config.yaml", + default="./config_step1.yaml", help="Path to a configuration file", ) + + parser.add_argument( + "--focal_length_choice", + "-f", + dest="focal_length_choice", + type=str, + default="effective", + help='Standard is "effective"', + ) - args = parser.parse_args() + args = parser.parse_args() #Here we select all 3 parameters collected above - with open(args.config_file, "rb") as f: - config = yaml.safe_load(f) + with open(args.config_file, "rb") as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) #Here we collect the inputs from the configuration file # Process the input data - mc_dl0_to_dl1(args.input_file, args.output_dir, config) + mc_dl0_to_dl1(args.input_file, args.output_dir, config, args.focal_length_choice) logger.info("\nDone.") @@ -485,4 +537,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_stereo_reco.py b/magicctapipe/scripts/lst1_magic/lst1_magic_stereo_reco.py index accec1cea..cdf367a31 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_stereo_reco.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_stereo_reco.py @@ -7,7 +7,7 @@ specified in the configuration file are applied to the events before the reconstruction. -When the input is real data containing LST-1 and MAGIC events, it checks +When the input is real data containing LST and MAGIC events, it checks the angular distances of their pointing directions and excludes the events taken with larger distances than the limit specified in the configuration file. This is in principle to avoid the reconstruction of @@ -24,6 +24,10 @@ (--output-dir dl1_stereo) (--config-file config.yaml) (--magic-only) + +Broader usage: +This script is called automatically from the script "stereo_events.py". +If you want to analyse a target, this is the way to go. See this other script for more details. """ import argparse @@ -54,7 +58,7 @@ logger.setLevel(logging.INFO) -def calculate_pointing_separation(event_data): +def calculate_pointing_separation(event_data, config): """ Calculates the angular distance of the LST-1 and MAGIC pointing directions. @@ -63,30 +67,36 @@ def calculate_pointing_separation(event_data): ---------- event_data: pandas.core.frame.DataFrame Data frame of LST-1 and MAGIC events - + config: dict + Configuration for the LST-1 + MAGIC analysis Returns ------- theta: pandas.core.series.Series - Angular distance of the LST-1 and MAGIC pointing directions - in the unit of degree + Angular distance of the LST array and MAGIC pointing directions + in units of degree """ - - # Extract LST-1 events - df_lst = event_data.query("tel_id == 1") - - # Extract the MAGIC events seen by also LST-1 - df_magic = event_data.query("tel_id == [2, 3]") + + assigned_tel_ids = config["mc_tel_ids"] #This variable becomes a dictionary, e.g.: {'LST-1': 1, 'LST-2': 0, 'LST-3': 0, 'LST-4': 0, 'MAGIC-I': 2, 'MAGIC-II': 3} + LSTs_IDs = np.asarray(list(assigned_tel_ids.values())[0:4]) + LSTs_IDs = list(LSTs_IDs[LSTs_IDs > 0]) #Here we list only the LSTs in use + MAGICs_IDs = np.asarray(list(assigned_tel_ids.values())[4:6]) + MAGICs_IDs = list(MAGICs_IDs[MAGICs_IDs > 0]) #Here we list only the MAGICs in use + + # Extract LST events + df_lst = event_data.query(f"tel_id == {LSTs_IDs}") + + # Extract the MAGIC events seen by also LST + df_magic = event_data.query(f"tel_id == {MAGICs_IDs}") df_magic = df_magic.loc[df_lst.index] - # Calculate the mean of the M1 and M2 pointing directions - pnt_az_magic, pnt_alt_magic = calculate_mean_direction( - lon=df_magic["pointing_az"], lat=df_magic["pointing_alt"], unit="rad" - ) + # Calculate the mean of the LSTs, and also of the M1 and M2 pointing directions + pnt_az_LST, pnt_alt_LST = calculate_mean_direction(lon=df_lst["pointing_az"], lat=df_lst["pointing_alt"], unit="rad") + pnt_az_magic, pnt_alt_magic = calculate_mean_direction(lon=df_magic["pointing_az"], lat=df_magic["pointing_alt"], unit="rad") # Calculate the angular distance of their pointing directions theta = angular_separation( - lon1=u.Quantity(df_lst["pointing_az"], unit="rad"), - lat1=u.Quantity(df_lst["pointing_alt"], unit="rad"), + lon1=u.Quantity(pnt_az_LST, unit="rad"), + lat1=u.Quantity(pnt_alt_LST, unit="rad"), lon2=u.Quantity(pnt_az_magic, unit="rad"), lat2=u.Quantity(pnt_alt_magic, unit="rad"), ) @@ -108,14 +118,15 @@ def stereo_reconstruction(input_file, output_dir, config, magic_only_analysis=Fa output_dir: str Path to a directory where to save an output DL1-stereo data file config: dict - Configuration for the LST-1 + MAGIC analysis + Configuration file for the stereo LST + MAGIC analysis, i.e. config_stereo.yaml magic_only_analysis: bool If `True`, it reconstructs the stereo parameters using only MAGIC events """ config_stereo = config["stereo_reco"] - + assigned_tel_ids = config["mc_tel_ids"] #This variable becomes a dictionary, e.g.: {'LST-1': 1, 'LST-2': 0, 'LST-3': 0, 'LST-4': 0, 'MAGIC-I': 2, 'MAGIC-II': 3} + # Load the input file logger.info(f"\nInput file: {input_file}") @@ -142,25 +153,36 @@ def stereo_reconstruction(input_file, output_dir, config, magic_only_analysis=Fa # Apply the event cuts logger.info(f"\nMAGIC-only analysis: {magic_only_analysis}") + LSTs_IDs = np.asarray(list(assigned_tel_ids.values())[0:4]) + if magic_only_analysis: - event_data.query("tel_id > 1", inplace=True) + event_data.query(f"tel_id > {LSTs_IDs.max()}", inplace=True) # Here we select only the events with the MAGIC tel_ids, i.e. above the maximum tel_id of the LSTs logger.info(f"\nQuality cuts: {config_stereo['quality_cuts']}") - event_data = get_stereo_events(event_data, config_stereo["quality_cuts"]) + event_data = get_stereo_events(event_data, config=config, quality_cuts=config_stereo["quality_cuts"]) - # Check the angular distance of the LST-1 and MAGIC pointing directions + # Check the angular distance of the LST and MAGIC pointing directions tel_ids = np.unique(event_data.index.get_level_values("tel_id")).tolist() - if (not is_simulation) and (tel_ids != [2, 3]): + Number_of_LSTs_in_use = len(LSTs_IDs[LSTs_IDs > 0]) + MAGICs_IDs = np.asarray(list(assigned_tel_ids.values())[4:6]) + Number_of_MAGICs_in_use = len(MAGICs_IDs[MAGICs_IDs > 0]) + if (Number_of_LSTs_in_use > 0) and (Number_of_MAGICs_in_use > 0): #If we use the two arrays, i.e. MAGIC and LST, then the "if" statement below will work (except for MC simulations) + Two_arrays_are_used = True + else: + Two_arrays_are_used = False + + if (not is_simulation) and (Two_arrays_are_used): + logger.info( "\nChecking the angular distances of " - "the LST-1 and MAGIC pointing directions..." + "the LST and MAGIC pointing directions..." ) event_data.reset_index(level="tel_id", inplace=True) # Calculate the angular distance - theta = calculate_pointing_separation(event_data) + theta = calculate_pointing_separation(event_data, config) theta_uplim = u.Quantity(config_stereo["theta_uplim"]) mask = u.Quantity(theta, unit="deg") < theta_uplim @@ -201,6 +223,7 @@ def stereo_reconstruction(input_file, output_dir, config, magic_only_analysis=Fa multi_indices = event_data.groupby(["obs_id", "event_id"]).size().index for i_evt, (obs_id, event_id) in enumerate(multi_indices): + if i_evt % 100 == 0: logger.info(f"{i_evt} events") @@ -218,6 +241,7 @@ def stereo_reconstruction(input_file, output_dir, config, magic_only_analysis=Fa tel_ids = df_evt.index.get_level_values("tel_id") for tel_id in tel_ids: + df_tel = df_evt.loc[tel_id] # Assign the telescope information @@ -321,6 +345,7 @@ def stereo_reconstruction(input_file, output_dir, config, magic_only_analysis=Fa def main(): + start_time = time.time() parser = argparse.ArgumentParser() @@ -348,7 +373,7 @@ def main(): "-c", dest="config_file", type=str, - default="./config.yaml", + default="./config_general.yaml", help="Path to a configuration file", ) @@ -363,7 +388,7 @@ def main(): with open(args.config_file, "rb") as f: config = yaml.safe_load(f) - + # Process the input data stereo_reconstruction(args.input_file, args.output_dir, config, args.magic_only) @@ -374,4 +399,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py b/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py index 645c10053..46b1067da 100644 --- a/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py +++ b/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py @@ -7,11 +7,6 @@ Hillas, timing and leakage parameters. It saves only the events that all the DL1 parameters are successfully reconstructed. -When saving data to an output file, the telescope IDs will be reset to -the following ones for the convenience of the combined analysis with -LST-1, whose telescope ID is 1: - -MAGIC-I: tel_id = 2, MAGIC-II: tel_id = 3 When the input is real data, it searches for all the subrun files with the same observation ID and stored in the same directory as the input @@ -27,12 +22,16 @@ this script, but since the MaTaJu cleaning is not yet implemented in this pipeline, it applies the standard cleaning instead. -Usage: +Usage per single data file (indicated if you want to do tests): $ python magic_calib_to_dl1.py --input-file calib/20201216_M1_05093711.001_Y_CrabNebula-W0.40+035.root (--output-dir dl1) (--config-file config.yaml) (--process-run) + +Broader usage: +This script is called automatically from the script "setting_up_config_and_dir.py". +If you want to analyse a target, this is the way to go. See this other script for more details. """ import argparse @@ -72,7 +71,7 @@ PEDESTAL_TYPES = ["fundamental", "from_extractor", "from_extractor_rndm"] -def magic_calib_to_dl1(input_file, output_dir, config, max_events, process_run=False): +def magic_calib_to_dl1(input_file, output_dir, config, process_run=False): """ Processes the events of MAGIC calibrated data and computes the DL1 parameters. @@ -94,7 +93,7 @@ def magic_calib_to_dl1(input_file, output_dir, config, max_events, process_run=F # Load the input file logger.info(f"\nInput file: {input_file}") - event_source = MAGICEventSource(input_file, process_run=process_run, max_events=max_events) + event_source = MAGICEventSource(input_file, process_run=process_run) is_simulation = event_source.is_simulation logger.info(f"\nIs simulation: {is_simulation}") @@ -300,10 +299,10 @@ def magic_calib_to_dl1(input_file, output_dir, config, max_events, process_run=F # Reset the telescope IDs if tel_id == 1: - event_info.tel_id = 2 # MAGIC-I + event_info.tel_id = config["mc_tel_ids"]["MAGIC-I"] # MAGIC-I elif tel_id == 2: - event_info.tel_id = 3 # MAGIC-II + event_info.tel_id = config["mc_tel_ids"]["MAGIC-II"] # MAGIC-II # Save the parameters to the output file writer.write( @@ -315,13 +314,13 @@ def magic_calib_to_dl1(input_file, output_dir, config, max_events, process_run=F # Reset the telescope IDs of the subarray description tel_positions_magic = { - 2: subarray.positions[1], # MAGIC-I - 3: subarray.positions[2], # MAGIC-II + config["mc_tel_ids"]["MAGIC-I"]: subarray.positions[1], # MAGIC-I + config["mc_tel_ids"]["MAGIC-II"]: subarray.positions[2], # MAGIC-II } tel_descriptions_magic = { - 2: subarray.tel[1], # MAGIC-I - 3: subarray.tel[2], # MAGIC-II + config["mc_tel_ids"]["MAGIC-I"]: subarray.tel[1], # MAGIC-I + config["mc_tel_ids"]["MAGIC-II"]: subarray.tel[2], # MAGIC-II } subarray_magic = SubarrayDescription( @@ -371,15 +370,6 @@ def main(): help="Path to a configuration file", ) - parser.add_argument( - "--max-evt", - "-m", - dest="max_events", - type=int, - default=None, - help="Max. number of processed showers", - ) - parser.add_argument( "--process-run", dest="process_run", @@ -393,7 +383,7 @@ def main(): config = yaml.safe_load(f) # Process the input data - magic_calib_to_dl1(args.input_file, args.output_dir, config, args.max_events, args.process_run) + magic_calib_to_dl1(args.input_file, args.output_dir, config, args.process_run) logger.info("\nDone.") diff --git a/magicctapipe/scripts/lst1_magic/merge_hdf_files.py b/magicctapipe/scripts/lst1_magic/merge_hdf_files.py index c11c450d7..0e8f48414 100644 --- a/magicctapipe/scripts/lst1_magic/merge_hdf_files.py +++ b/magicctapipe/scripts/lst1_magic/merge_hdf_files.py @@ -2,7 +2,7 @@ # coding: utf-8 """ -This script merges the HDF files produced by the LST-1 + MAGIC combined +This script merges the HDF files produced by the LST + MAGIC combined analysis pipeline. It parses information from the file names, so they should follow the convention, i.e., *Run*.*.h5 or *run*.h5. @@ -13,8 +13,7 @@ If the `--run-wise` argument is given, it merges input files run-wise. It is applicable only to real data since MC data are already produced run-wise. The `--subrun-wise` argument can be also used to merge MAGIC -DL1 real data subrun-wise (for example, dl1_M1.Run05093711.001.h5 -+ dl1_M2.Run05093711.001.h5 -> dl1_MAGIC.Run05093711.001.h5). +DL1 real data subrun-wise. Usage: $ python merge_hdf_files.py @@ -22,6 +21,10 @@ (--output-dir dl1_merged) (--run-wise) (--subrun-wise) + +Broader usage: +This script is called automatically from the script "merging_runs_and_spliting_training_samples.py". +If you want to analyse a target, this is the way to go. See this other script for more details. """ import argparse diff --git a/magicctapipe/scripts/lst1_magic/merging_runs_and_splitting_training_samples.py b/magicctapipe/scripts/lst1_magic/merging_runs_and_splitting_training_samples.py new file mode 100644 index 000000000..621d112f4 --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/merging_runs_and_splitting_training_samples.py @@ -0,0 +1,269 @@ +""" +This script split the proton MC data sample into "train" +and "test", deletes possible failed runs (only those files +that end up with a size < 1 kB), and generates the bash +scripts to merge the data files calling the script "merge_hdf_files.py" +in the follwoing order: + +MAGIC: +1) Merge the subruns into runs for M1 and M2 individually. +2) Merge the runs of M1 and M2 into M1-M2 runs. +3) Merge all the M1-M2 runs for a given night. +Workingdir/DL1/Observations/Merged + +MC: +1) Merges all MC runs in a node and save them at +Workingdir/DL1/MC/PARTICLE/Merged + + +Usage: +$ python merging_runs_and_splitting_training_samples.py + +""" + +import os +import numpy as np +import glob +import yaml +import logging +from tqdm import tqdm +from pathlib import Path + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + +def cleaning(list_of_nodes, target_dir): + + """ + This function looks for failed runs in each node and remove them. + + Parameters + ---------- + target_dir: str + Path to the target directory. + list_of_nodes: array of str + List of nodes where the function will look for failed runs. + """ + + for i in tqdm(range(len(list_of_nodes)), desc="Cleaning failed runs"): + os.chdir(list_of_nodes[i]) + os.system('find . -type f -name "*.h5" -size -1k -delete') + + os.chdir(target_dir+"/../") + print("Cleaning done.") + +def split_train_test(target_dir, train_fraction): + + """ + This function splits the MC proton sample in 2, i.e. the "test" and the "train" subsamples. + It generates 2 subdirectories in the directory .../DL1/MC/protons named "test" and "train" and creates sub-sub-directories with the names of all nodes. + For each node sub-sub-directory we move 80% of the .h5 files (if it is in the "test" subdirectory) or 20% of the .h5 files (if it is in the "train" subdirectory). + + Parameters + ---------- + target_dir: str + Path to the working directory + train_fraction: float + Fraction of proton MC files to be used in the training RF dataset + """ + + proton_dir = target_dir+"/DL1/MC/protons" + + if not os.path.exists(proton_dir+"/train"): + os.mkdir(proton_dir+"/train") + if not os.path.exists(proton_dir+"/../protons_test"): + os.mkdir(proton_dir+"/../protons_test") + + list_of_dir = np.sort(glob.glob(proton_dir+'/node*' + os.path.sep)) + + for directory in tqdm(range(len(list_of_dir))): #tqdm allows us to print a progessbar in the terminal + if not os.path.exists(proton_dir+"/train/"+list_of_dir[directory].split("/")[-2]): + os.mkdir(proton_dir+"/train/"+list_of_dir[directory].split("/")[-2]) + if not os.path.exists(proton_dir+"/../protons_test/"+list_of_dir[directory].split("/")[-2]): + os.mkdir(proton_dir+"/../protons_test/"+list_of_dir[directory].split("/")[-2]) + list_of_runs = np.sort(glob.glob(proton_dir+"/"+list_of_dir[directory].split("/")[-2]+"/*.h5")) + split_percent = int(len(list_of_runs)*train_fraction) + for j in list_of_runs[0:split_percent]: + os.system(f"mv {j} {proton_dir}/train/"+list_of_dir[directory].split("/")[-2]) + + os.system(f"cp {list_of_dir[directory]}*.txt "+proton_dir+"/train/"+list_of_dir[directory].split("/")[-2]) + os.system(f"mv {list_of_dir[directory]}*.txt "+proton_dir+"/../protons_test/"+list_of_dir[directory].split("/")[-2]) + os.system(f"mv {list_of_dir[directory]}*.h5 "+proton_dir+"/../protons_test/"+list_of_dir[directory].split("/")[-2]) + os.system(f"rm -r {list_of_dir[directory]}") + +def merge(target_dir, identification, MAGIC_runs): + + """ + This function creates the bash scripts to run merge_hdf_files.py in all MAGIC subruns. + + Parameters + ---------- + target_dir: str + Path to the working directory + identification: str + Tells which batch to create. Options: subruns, M1M2, nights + MAGIC_runs: matrix of strings + This matrix is imported from config_general.yaml and tells the function where to find the data and where to put the merged files + """ + + process_name = "merging_"+target_dir.split("/")[-2:][1] + + MAGIC_DL1_dir = target_dir+"/DL1/Observations" + if os.path.exists(MAGIC_DL1_dir+"/M1") & os.path.exists(MAGIC_DL1_dir+"/M2"): + if not os.path.exists(MAGIC_DL1_dir+"/Merged"): + os.mkdir(MAGIC_DL1_dir+"/Merged") + + f = open(f"Merge_{identification}.sh","w") + f.write('#!/bin/sh\n\n') + f.write('#SBATCH -p short\n') + f.write('#SBATCH -J '+process_name+'\n') + f.write('#SBATCH -N 1\n\n') + f.write('ulimit -l unlimited\n') + f.write('ulimit -s unlimited\n') + f.write('ulimit -a\n\n') + + if identification == "0_subruns": + if os.path.exists(MAGIC_DL1_dir+"/M1"): + for i in MAGIC_runs: + if not os.path.exists(MAGIC_DL1_dir+f"/Merged/{i[0]}"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}") #Creating a merged directory for the respective night + if not os.path.exists(MAGIC_DL1_dir+f"/Merged/{i[0]}/{i[1]}"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}") #Creating a merged directory for the respective run + f.write(f'conda run -n magic-lst python merge_hdf_files.py --input-dir {MAGIC_DL1_dir}/M1/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} \n') + + if os.path.exists(MAGIC_DL1_dir+"/M2"): + for i in MAGIC_runs: + if not os.path.exists(MAGIC_DL1_dir+f"/Merged/{i[0]}"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}") #Creating a merged directory for the respective night + if not os.path.exists(MAGIC_DL1_dir+f"/Merged/{i[0]}/{i[1]}"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]}") #Creating a merged directory for the respective run + f.write(f'conda run -n magic-lst python merge_hdf_files.py --input-dir {MAGIC_DL1_dir}/M2/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} \n') + + elif identification == "1_M1M2": + if os.path.exists(MAGIC_DL1_dir+"/M1") & os.path.exists(MAGIC_DL1_dir+"/M2"): + for i in MAGIC_runs: + if not os.path.exists(MAGIC_DL1_dir+f"/Merged/{i[0]}/Merged"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/{i[0]}/Merged") + f.write(f'conda run -n magic-lst python merge_hdf_files.py --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/{i[1]} --output-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --run-wise \n') + else: + for i in MAGIC_runs: + if not os.path.exists(MAGIC_DL1_dir+f"/Merged/Merged_{i[0]}"): + os.mkdir(f"{MAGIC_DL1_dir}/Merged/Merged_{i[0]}") #Creating a merged directory for each night + f.write(f'conda run -n magic-lst python merge_hdf_files.py --input-dir {MAGIC_DL1_dir}/Merged/{i[0]}/Merged --output-dir {MAGIC_DL1_dir}/Merged/Merged_{i[0]} \n') + + + f.close() + + +def mergeMC(target_dir, identification): + + """ + This function creates the bash scripts to run merge_hdf_files.py in all MC runs. + + Parameters + ---------- + target_dir: str + Path to the working directory + identification: str + Tells which batch to create. Options: protons, gammadiffuse + """ + + process_name = "merging_"+target_dir.split("/")[-2:][1] + + MC_DL1_dir = target_dir+"/DL1/MC" + if not os.path.exists(MC_DL1_dir+f"/{identification}/Merged"): + os.mkdir(MC_DL1_dir+f"/{identification}/Merged") + + if identification == "protons": + list_of_nodes = np.sort(glob.glob(MC_DL1_dir+f"/{identification}/train/node*")) + else: + list_of_nodes = np.sort(glob.glob(MC_DL1_dir+f"/{identification}/node*")) + + np.savetxt(MC_DL1_dir+f"/{identification}/list_of_nodes.txt",list_of_nodes, fmt='%s') + + + process_size = len(list_of_nodes) - 1 + + cleaning(list_of_nodes, target_dir) #This will delete the (possibly) failed runs. + + f = open(f"Merge_{identification}.sh","w") + f.write('#!/bin/sh\n\n') + f.write('#SBATCH -p short\n') + f.write('#SBATCH -J '+process_name+'\n') + f.write(f"#SBATCH --array=0-{process_size}%50\n") + f.write('#SBATCH --mem=7g\n') + f.write('#SBATCH -N 1\n\n') + f.write('ulimit -l unlimited\n') + f.write('ulimit -s unlimited\n') + f.write('ulimit -a\n\n') + + f.write(f"SAMPLE_LIST=($(<{MC_DL1_dir}/{identification}/list_of_nodes.txt))\n") + f.write("SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n") + f.write(f'export LOG={MC_DL1_dir}/{identification}/Merged'+'/merged_${SLURM_ARRAY_TASK_ID}.log\n') + f.write(f'conda run -n magic-lst python merge_hdf_files.py --input-dir $SAMPLE --output-dir {MC_DL1_dir}/{identification}/Merged >$LOG 2>&1\n') + + f.close() + + +def main(): + + """ + Here we read the config_general.yaml file, split the pronton sample into "test" and "train", and merge the MAGIC files. + """ + + + with open("config_general.yaml", "rb") as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + + target_dir = str(Path(config["directories"]["workspace_dir"]))+"/"+config["directories"]["target_name"] + + MAGIC_runs_and_dates = config["general"]["MAGIC_runs"] + MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates,dtype=str,delimiter=',') + + train_fraction = float(config["general"]["proton_train"]) + + + #Here we slice the proton MC data into "train" and "test": + print("***** Splitting protons into 'train' and 'test' datasets...") + split_train_test(target_dir, train_fraction) + + print("***** Generating merge bashscripts...") + merge(target_dir, "0_subruns", MAGIC_runs) #generating the bash script to merge the subruns + merge(target_dir, "1_M1M2", MAGIC_runs) #generating the bash script to merge the M1 and M2 runs + merge(target_dir, "2_nights", MAGIC_runs) #generating the bash script to merge all runs per night + + print("***** Generating mergeMC bashscripts...") + mergeMC(target_dir, "protons") #generating the bash script to merge the files + mergeMC(target_dir, "gammadiffuse") #generating the bash script to merge the files + mergeMC(target_dir, "gammas") #generating the bash script to merge the files + mergeMC(target_dir, "protons_test") + + + print("***** Running merge_hdf_files.py in the MAGIC data files...") + print("Process name: merging_"+target_dir.split("/")[-2:][1]) + print("To check the jobs submitted to the cluster, type: squeue -n merging_"+target_dir.split("/")[-2:][1]) + + #Below we run the bash scripts to merge the MAGIC files + list_of_merging_scripts = np.sort(glob.glob("Merge_*.sh")) + + for n,run in enumerate(list_of_merging_scripts): + if n == 0: + launch_jobs = f"merging{n}=$(sbatch --parsable {run})" + else: + launch_jobs = launch_jobs + f" && merging{n}=$(sbatch --parsable --dependency=afterany:$merging{n-1} {run})" + + #print(launch_jobs) + os.system(launch_jobs) + +if __name__ == "__main__": + main() + + + + + + + + diff --git a/magicctapipe/scripts/lst1_magic/setting_up_config_and_dir.py b/magicctapipe/scripts/lst1_magic/setting_up_config_and_dir.py new file mode 100644 index 000000000..c93f0cc7e --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/setting_up_config_and_dir.py @@ -0,0 +1,451 @@ +""" +This script facilitates the usage of other two scripts +of the MCP, i.e. "lst1_magic_mc_dl0_to_dl1.py" and +"magic_calib_to_dl1.py". This script is more like a +"manager" that organizes the analysis process by: +1) Creating the necessary directories and subdirectories. +2) Generatign all the bash script files that convert the +MAGIC and MC files from DL0 to DL1. +3) Launching these jobs in the IT container. + +Notice that in this stage we only use MAGIC + MC data. +No LST data is used here. + +Standard usage: +$ python setting_up_config_and_dir.py + +If you want to run only the MAGIC or only the MC conversion, +you can do as follows: + +Only MAGIC: +$ python setting_up_config_and_dir.py --partial-analysis onlyMAGIC + +Only MC: +$ python setting_up_config_and_dir.py --partial-analysis onlyMC + +""" + +import os +import numpy as np +import argparse +import glob +import time +import yaml +from pathlib import Path + +def config_file_gen(ids, target_dir): + + """ + Here we create the configuration file needed for transforming DL0 into DL1 + """ + + f = open(target_dir+'/config_DL0_to_DL1.yaml','w') + #f.write("directories:\n target: "+target_dir+"\n\n") + lines_of_config_file = [ + "mc_tel_ids:", + "\n LST-1: "+str(ids[0]), + "\n LST-2: "+str(ids[1]), + "\n LST-3: "+str(ids[2]), + "\n LST-4: "+str(ids[3]), + "\n MAGIC-I: "+str(ids[4]), + "\n MAGIC-II: "+str(ids[5]), + "\n", + "\nLST:", + "\n image_extractor:", + '\n type: "LocalPeakWindowSum"', + "\n window_shift: 4", + "\n window_width: 8", + "\n", + "\n increase_nsb:", + "\n use: true", + "\n extra_noise_in_dim_pixels: 1.27", + "\n extra_bias_in_dim_pixels: 0.665", + "\n transition_charge: 8", + "\n extra_noise_in_bright_pixels: 2.08", + "\n", + "\n increase_psf:", + "\n use: false", + "\n fraction: null", + "\n", + "\n tailcuts_clean:", + "\n picture_thresh: 8", + "\n boundary_thresh: 4", + "\n keep_isolated_pixels: false", + "\n min_number_picture_neighbors: 2", + "\n", + "\n time_delta_cleaning:", + "\n use: true", + "\n min_number_neighbors: 1", + "\n time_limit: 2", + "\n", + "\n dynamic_cleaning:", + "\n use: true", + "\n threshold: 267", + "\n fraction: 0.03", + "\n", + "\n use_only_main_island: false", + "\n", + "\nMAGIC:", + "\n image_extractor:", + '\n type: "SlidingWindowMaxSum"', + "\n window_width: 5", + "\n apply_integration_correction: false", + "\n", + "\n charge_correction:", + "\n use: true", + "\n factor: 1.143", + "\n", + "\n magic_clean:", + "\n use_time: true", + "\n use_sum: true", + "\n picture_thresh: 6", + "\n boundary_thresh: 3.5", + "\n max_time_off: 4.5", + "\n max_time_diff: 1.5", + "\n find_hotpixels: true", + '\n pedestal_type: "from_extractor_rndm"', + "\n", + "\n muon_ring:", + "\n thr_low: 25", + "\n tailcut: [12, 8]", + "\n ring_completeness_threshold: 25", + "\n"] + + f.writelines(lines_of_config_file) + f.close() + + +def lists_and_bash_generator(particle_type, target_dir, MC_path, SimTel_version, telescope_ids, focal_length): + + """ + This function creates the lists list_nodes_gamma_complete.txt and list_folder_gamma.txt with the MC file paths. + After that, it generates a few bash scripts to link the MC paths to each subdirectory. + These bash scripts will be called later in the main() function below. + """ + + process_name = target_dir.split("/")[-2:][1] + + list_of_nodes = glob.glob(MC_path+"/node*") + f = open(target_dir+f"/list_nodes_{particle_type}_complete.txt","w") # creating list_nodes_gammas_complete.txt + for i in list_of_nodes: + f.write(i+"/output_"+SimTel_version+"\n") + + f.close() + + f = open(target_dir+f"/list_folder_{particle_type}.txt","w") # creating list_folder_gammas.txt + for i in list_of_nodes: + f.write(i.split("/")[-1]+"\n") + + f.close() + + #################################################################################### + ############ bash scripts that link the MC paths to each subdirectory. + #################################################################################### + + f = open(f"linking_MC_{particle_type}_paths.sh","w") + lines_of_config_file = [ + "#!/bin/sh\n\n", + "#SBATCH -p short\n", + "#SBATCH -J "+process_name+"\n\n", + "#SBATCH -N 1\n\n", + "ulimit -l unlimited\n", + "ulimit -s unlimited\n", + "ulimit -a\n\n", + "while read -r -u 3 lineA && read -r -u 4 lineB\n", + "do\n", + " cd "+target_dir+f"/DL1/MC/{particle_type}\n", + " mkdir $lineB\n", + " cd $lineA\n", + " ls -lR *.gz |wc -l\n", + " ls *.gz > "+target_dir+f"/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n", + ' string=$lineA"/"\n', + " export file="+target_dir+f"/DL1/MC/{particle_type}/$lineB/list_dl0.txt\n\n", + " cat $file | while read line; do echo $string${line} >>"+target_dir+f"/DL1/MC/{particle_type}/$lineB/list_dl0_ok.txt; done\n\n", + ' echo "folder $lineB and node $lineA"\n', + 'done 3<"'+target_dir+f'/list_nodes_{particle_type}_complete.txt" 4<"'+target_dir+f'/list_folder_{particle_type}.txt"\n', + ""] + f.writelines(lines_of_config_file) + f.close() + + + ################################################################################################################ + ############################ bash script that applies lst1_magic_mc_dl0_to_dl1.py to all MC data files. + ################################################################################################################ + + number_of_nodes = glob.glob(MC_path+"/node*") + number_of_nodes = len(number_of_nodes) -1 + + f = open(f"linking_MC_{particle_type}_paths_r.sh","w") + lines_of_config_file = [ + '#!/bin/sh\n\n', + '#SBATCH -p xxl\n', + '#SBATCH -J '+process_name+'\n', + '#SBATCH --array=0-'+str(number_of_nodes)+'%50\n', + '#SBATCH --mem=10g\n', + '#SBATCH -N 1\n\n', + 'ulimit -l unlimited\n', + 'ulimit -s unlimited\n', + 'ulimit -a\n', + 'cd '+target_dir+f'/DL1/MC/{particle_type}\n\n', + 'export INF='+target_dir+'\n', + f'SAMPLE_LIST=($(<$INF/list_folder_{particle_type}.txt))\n', + 'SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n', + 'cd $SAMPLE\n\n', + 'export LOG='+target_dir+f'/DL1/MC/{particle_type}'+'/simtel_{$SAMPLE}_all.log\n', + 'cat list_dl0_ok.txt | while read line\n', + 'do\n', + ' cd '+target_dir+'/../\n', + ' conda run -n magic-lst python lst1_magic_mc_dl0_to_dl1.py --input-file $line --output-dir '+target_dir+f'/DL1/MC/{particle_type}/$SAMPLE --config-file '+target_dir+'/config_DL0_to_DL1.yaml >>$LOG 2>&1 --focal_length_choice '+focal_length+'\n\n', + 'done\n', + ""] + f.writelines(lines_of_config_file) + f.close() + + + + +def lists_and_bash_gen_MAGIC(target_dir, telescope_ids, MAGIC_runs): + + """ + Below we create a bash script that links the the MAGIC data paths to each subdirectory. + """ + + process_name = target_dir.split("/")[-2:][1] + + f = open("linking_MAGIC_data_paths.sh","w") + f.write('#!/bin/sh\n\n') + f.write('#SBATCH -p short\n') + f.write('#SBATCH -J '+process_name+'\n') + f.write('#SBATCH -N 1\n\n') + f.write('ulimit -l unlimited\n') + f.write('ulimit -s unlimited\n') + f.write('ulimit -a\n') + + if telescope_ids[-1] > 0: + for i in MAGIC_runs: + f.write('export IN1=/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/'+i[0].split("_")[0]+"/"+i[0].split("_")[1]+"/"+i[0].split("_")[2]+'\n') + f.write('export OUT1='+target_dir+'/DL1/Observations/M2/'+i[0]+'/'+i[1]+'\n') + f.write('ls $IN1/*'+i[1][-2:]+'.*_Y_*.root > $OUT1/list_dl0.txt\n') + + f.write('\n') + if telescope_ids[-2] > 0: + for i in MAGIC_runs: + f.write('export IN1=/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/'+i[0].split("_")[0]+"/"+i[0].split("_")[1]+"/"+i[0].split("_")[2]+'\n') + f.write('export OUT1='+target_dir+'/DL1/Observations/M1/'+i[0]+'/'+i[1]+'\n') + f.write('ls $IN1/*'+i[1][-2:]+'.*_Y_*.root > $OUT1/list_dl0.txt\n') + + f.close() + + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + for i in MAGIC_runs: + if telescope_ids[-1] > 0: + + number_of_nodes = glob.glob('/fefs/onsite/common/MAGIC/data/M2/event/Calibrated/'+i[0].split("_")[0]+"/"+i[0].split("_")[1]+"/"+i[0].split("_")[2]+f'/*{i[1]}.*_Y_*.root') + number_of_nodes = len(number_of_nodes) - 1 + + f = open(f"MAGIC-II_dl0_to_dl1_run_{i[1]}.sh","w") + lines_of_config_file = [ + '#!/bin/sh\n\n', + '#SBATCH -p long\n', + '#SBATCH -J '+process_name+'\n', + '#SBATCH --array=0-'+str(number_of_nodes)+'\n', + '#SBATCH -N 1\n\n', + 'ulimit -l unlimited\n', + 'ulimit -s unlimited\n', + 'ulimit -a\n\n', + 'export OUTPUTDIR='+target_dir+'/DL1/Observations/M2/'+i[0]+'/'+i[1]+'\n', + 'cd '+target_dir+'/../\n', + 'SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n', + 'SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n', + 'export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n', + 'conda run -n magic-lst python magic_calib_to_dl1.py --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file '+target_dir+'/config_DL0_to_DL1.yaml >$LOG 2>&1\n', + ""] + f.writelines(lines_of_config_file) + f.close() + + if telescope_ids[-2] > 0: + + number_of_nodes = glob.glob('/fefs/onsite/common/MAGIC/data/M1/event/Calibrated/'+i[0].split("_")[0]+"/"+i[0].split("_")[1]+"/"+i[0].split("_")[2]+f'/*{i[1]}.*_Y_*.root') + number_of_nodes = len(number_of_nodes) - 1 + + f = open(f"MAGIC-I_dl0_to_dl1_run_{i[1]}.sh","w") + lines_of_config_file = [ + '#!/bin/sh\n\n', + '#SBATCH -p long\n', + '#SBATCH -J '+process_name+'\n', + '#SBATCH --array=0-'+str(number_of_nodes)+'\n', + '#SBATCH -N 1\n\n', + 'ulimit -l unlimited\n', + 'ulimit -s unlimited\n', + 'ulimit -a\n\n', + 'export OUTPUTDIR='+target_dir+'/DL1/Observations/M1/'+i[0]+'/'+i[1]+'\n', + 'cd '+target_dir+'/../\n', + 'SAMPLE_LIST=($(<$OUTPUTDIR/list_dl0.txt))\n', + 'SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n\n', + 'export LOG=$OUTPUTDIR/real_0_1_task${SLURM_ARRAY_TASK_ID}.log\n', + 'conda run -n magic-lst python magic_calib_to_dl1.py --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file '+target_dir+'/config_DL0_to_DL1.yaml >$LOG 2>&1\n', + ""] + f.writelines(lines_of_config_file) + f.close() + + +def directories_generator(target_dir, telescope_ids,MAGIC_runs): + + """ + Here we create all subdirectories for a given workspace and target name. + """ + + ########################################### + ##################### MC + ########################################### + + if not os.path.exists(target_dir): + os.mkdir(target_dir) + os.mkdir(target_dir+"/DL1") + os.mkdir(target_dir+"/DL1/Observations") + os.mkdir(target_dir+"/DL1/MC") + os.mkdir(target_dir+"/DL1/MC/gammas") + os.mkdir(target_dir+"/DL1/MC/gammadiffuse") + os.mkdir(target_dir+"/DL1/MC/electrons") + os.mkdir(target_dir+"/DL1/MC/protons") + os.mkdir(target_dir+"/DL1/MC/helium") + else: + overwrite = input("MC directory for "+target_dir.split("/")[-1]+" already exists. Would you like to overwrite it? [only 'y' or 'n']: ") + if overwrite == "y": + os.system("rm -r "+target_dir) + os.mkdir(target_dir) + os.mkdir(target_dir+"/DL1") + os.mkdir(target_dir+"/DL1/Observations") + os.mkdir(target_dir+"/DL1/MC") + os.mkdir(target_dir+"/DL1/MC/gammas") + os.mkdir(target_dir+"/DL1/MC/gammadiffuse") + os.mkdir(target_dir+"/DL1/MC/electrons") + os.mkdir(target_dir+"/DL1/MC/protons") + os.mkdir(target_dir+"/DL1/MC/helium") + else: + print("Directory not modified.") + + + + ########################################### + ##################### MAGIC + ########################################### + + if telescope_ids[-1] > 0: + if not os.path.exists(target_dir+"/DL1/Observations/M2"): + os.mkdir(target_dir+"/DL1/Observations/M2") + for i in MAGIC_runs: + if not os.path.exists(target_dir+"/DL1/Observations/M2/"+i[0]): + os.mkdir(target_dir+"/DL1/Observations/M2/"+i[0]) + os.mkdir(target_dir+"/DL1/Observations/M2/"+i[0]+"/"+i[1]) + else: + os.mkdir(target_dir+"/DL1/Observations/M2/"+i[0]+"/"+i[1]) + + if telescope_ids[-2] > 0: + if not os.path.exists(target_dir+"/DL1/Observations/M1"): + os.mkdir(target_dir+"/DL1/Observations/M1") + for i in MAGIC_runs: + if not os.path.exists(target_dir+"/DL1/Observations/M1/"+i[0]): + os.mkdir(target_dir+"/DL1/Observations/M1/"+i[0]) + os.mkdir(target_dir+"/DL1/Observations/M1/"+i[0]+"/"+i[1]) + else: + os.mkdir(target_dir+"/DL1/Observations/M1/"+i[0]+"/"+i[1]) + + + + + +def main(): + + """ Here we read the config_general.yaml file and call the functions to generate the necessary directories, bash scripts and launching the jobs.""" + + parser = argparse.ArgumentParser() + + #Here we are simply collecting the parameters from the command line, as input file, output directory, and configuration file + parser.add_argument( + "--partial-analysis", + "-p", + dest="partial_analysis", + type=str, + default="doEverything", + help="You can type 'onlyMAGIC' or 'onlyMC' to run this script only on MAGIC or MC data, respectively.", + ) + + args = parser.parse_args() + + + + with open("config_general.yaml", "rb") as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + + #Below we read the telescope IDs and runs + telescope_ids = list(config["mc_tel_ids"].values()) + SimTel_version = config["general"]["SimTel_version"] + MAGIC_runs_and_dates = config["general"]["MAGIC_runs"] + MAGIC_runs = np.genfromtxt(MAGIC_runs_and_dates,dtype=str,delimiter=',') #READ LIST OF DATES AND RUNS: format table in a way that each line looks like "2020_11_19,5093174" + focal_length = config["general"]["focal_length"] + + #Below we read the data paths + target_dir = str(Path(config["directories"]["workspace_dir"]))+"/"+config["directories"]["target_name"] + MC_gammas = str(Path(config["directories"]["MC_gammas"])) + MC_electrons = str(Path(config["directories"]["MC_electrons"])) + MC_helium = str(Path(config["directories"]["MC_helium"])) + MC_protons = str(Path(config["directories"]["MC_protons"])) + MC_gammadiff = str(Path(config["directories"]["MC_gammadiff"])) + + + print("***** Linking MC paths - this may take a few minutes ******") + print("*** Reducing DL0 to DL1 data - this can take many hours ***") + print("Process name: ",target_dir.split('/')[-2:][1]) + print("To check the jobs submitted to the cluster, type: squeue -n",target_dir.split('/')[-2:][1]) + + directories_generator(target_dir, telescope_ids, MAGIC_runs) #Here we create all the necessary directories in the given workspace and collect the main directory of the target + config_file_gen(telescope_ids,target_dir) + + #Below we run the analysis on the MC data + if not args.partial_analysis=='onlyMAGIC': + lists_and_bash_generator("gammas", target_dir, MC_gammas, SimTel_version, telescope_ids, focal_length) #gammas + #lists_and_bash_generator("electrons", target_dir, MC_electrons, SimTel_version, telescope_ids, focal_length) #electrons + #lists_and_bash_generator("helium", target_dir, MC_helium, SimTel_version, telescope_ids, focal_length) #helium + lists_and_bash_generator("protons", target_dir, MC_protons, SimTel_version, telescope_ids, focal_length) #protons + lists_and_bash_generator("gammadiffuse", target_dir, MC_gammadiff, SimTel_version, telescope_ids, focal_length) #gammadiffuse + + #Here we do the MC DL0 to DL1 conversion: + list_of_MC = glob.glob("linking_MC_*s.sh") + + #os.system("RES=$(sbatch --parsable linking_MC_gammas_paths.sh) && sbatch --dependency=afterok:$RES MC_dl0_to_dl1.sh") + + for n,run in enumerate(list_of_MC): + if n == 0: + launch_jobs_MC = f"linking{n}=$(sbatch --parsable {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + else: + launch_jobs_MC = launch_jobs_MC + f" && linking{n}=$(sbatch --parsable --dependency=afterany:$running{n-1} {run}) && running{n}=$(sbatch --parsable --dependency=afterany:$linking{n} {run[0:-3]}_r.sh)" + + + os.system(launch_jobs_MC) + + #Below we run the analysis on the MAGIC data + if not args.partial_analysis=='onlyMC': + lists_and_bash_gen_MAGIC(target_dir, telescope_ids, MAGIC_runs) #MAGIC real data + if (telescope_ids[-2] > 0) or (telescope_ids[-1] > 0): + + list_of_MAGIC_runs = glob.glob("MAGIC-*.sh") + + for n,run in enumerate(list_of_MAGIC_runs): + if n == 0: + launch_jobs = f"linking=$(sbatch --parsable linking_MAGIC_data_paths.sh) && RES{n}=$(sbatch --parsable --dependency=afterany:$linking {run})" + else: + launch_jobs = launch_jobs + f" && RES{n}=$(sbatch --parsable --dependency=afterany:$RES{n-1} {run})" + + os.system(launch_jobs) + +if __name__ == "__main__": + main() + + + + + + + diff --git a/magicctapipe/scripts/lst1_magic/stereo_events.py b/magicctapipe/scripts/lst1_magic/stereo_events.py new file mode 100644 index 000000000..4227a857c --- /dev/null +++ b/magicctapipe/scripts/lst1_magic/stereo_events.py @@ -0,0 +1,183 @@ +""" +This scripts generates and runs the bashscripts +to compute the stereo parameters of DL1 MC and +Coincident MAGIC+LST data files. + +Usage: +$ python stereo_events.py + +""" + +import os +import numpy as np +import glob +import yaml +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.INFO) + +def configfile_stereo(ids, target_dir): + + """ + This function creates the configuration file needed for the event stereo step + + Parameters + ---------- + ids: list + list of telescope IDs + target_dir: str + Path to the working directory + """ + + f = open(target_dir+'/config_stereo.yaml','w') + f.write("mc_tel_ids:\n LST-1: "+str(ids[0])+"\n LST-2: "+str(ids[1])+"\n LST-3: "+str(ids[2])+"\n LST-4: "+str(ids[3])+"\n MAGIC-I: "+str(ids[4])+"\n MAGIC-II: "+str(ids[5])+"\n\n") + f.write('stereo_reco:\n quality_cuts: "(intensity > 50) & (width > 0)"\n theta_uplim: "6 arcmin"\n') + f.close() + + +def bash_stereo(target_dir): + + """ + This function generates the bashscript for running the stereo analysis. + + Parameters + ---------- + target_dir: str + Path to the working directory + """ + + process_name = target_dir.split("/")[-2:][1] + + if not os.path.exists(target_dir+"/DL1/Observations/Coincident_stereo"): + os.mkdir(target_dir+"/DL1/Observations/Coincident_stereo") + + listOfNightsLST = np.sort(glob.glob(target_dir+"/DL1/Observations/Coincident/*")) + + for nightLST in listOfNightsLST: + stereoDir = target_dir+"/DL1/Observations/Coincident_stereo/"+nightLST.split('/')[-1] + if not os.path.exists(stereoDir): + os.mkdir(stereoDir) + + os.system(f"ls {nightLST}/*LST*.h5 > {nightLST}/list_coin.txt") #generating a list with the DL1 coincident data files. + process_size = len(np.genfromtxt(nightLST+"/list_coin.txt",dtype="str")) - 1 + + f = open(f"StereoEvents_{nightLST.split('/')[-1]}.sh","w") + f.write("#!/bin/sh\n\n") + f.write("#SBATCH -p short\n") + f.write("#SBATCH -J "+process_name+"_stereo\n") + f.write(f"#SBATCH --array=0-{process_size}%100\n") + f.write("#SBATCH -N 1\n\n") + f.write("ulimit -l unlimited\n") + f.write("ulimit -s unlimited\n") + f.write("ulimit -a\n\n") + + f.write(f"export INPUTDIR={nightLST}\n") + f.write(f"export OUTPUTDIR={stereoDir}\n") + f.write("SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n") + f.write("SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n") + f.write("export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n") + f.write(f"conda run -n magic-lst python lst1_magic_stereo_reco.py --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1") + f.close() + +def bash_stereoMC(target_dir, identification): + + """ + This function generates the bashscript for running the stereo analysis. + + Parameters + ---------- + target_dir: str + Path to the working directory + identification: str + Particle name. Options: protons, gammadiffuse + """ + + process_name = target_dir.split("/")[-2:][1] + + if not os.path.exists(target_dir+f"/DL1/MC/{identification}/Merged/StereoMerged"): + os.mkdir(target_dir+f"/DL1/MC/{identification}/Merged/StereoMerged") + + inputdir = target_dir+f"/DL1/MC/{identification}/Merged" + + os.system(f"ls {inputdir}/dl1*.h5 > {inputdir}/list_coin.txt") #generating a list with the DL1 coincident data files. + process_size = len(np.genfromtxt(inputdir+"/list_coin.txt",dtype="str")) - 1 + + f = open(f"StereoEvents_{identification}.sh","w") + f.write("#!/bin/sh\n\n") + f.write("#SBATCH -p xxl\n") + f.write("#SBATCH -J "+process_name+"_stereo\n") + f.write(f"#SBATCH --array=0-{process_size}%100\n") + f.write('#SBATCH --mem=30g\n') + f.write("#SBATCH -N 1\n\n") + f.write("ulimit -l unlimited\n") + f.write("ulimit -s unlimited\n") + f.write("ulimit -a\n\n") + + f.write(f"export INPUTDIR={inputdir}\n") + f.write(f"export OUTPUTDIR={inputdir}/StereoMerged\n") + f.write("SAMPLE_LIST=($(<$INPUTDIR/list_coin.txt))\n") + f.write("SAMPLE=${SAMPLE_LIST[${SLURM_ARRAY_TASK_ID}]}\n") + f.write("export LOG=$OUTPUTDIR/stereo_${SLURM_ARRAY_TASK_ID}.log\n") + f.write(f"conda run -n magic-lst python lst1_magic_stereo_reco.py --input-file $SAMPLE --output-dir $OUTPUTDIR --config-file {target_dir}/config_stereo.yaml >$LOG 2>&1") + f.close() + + + + + +def main(): + + """ + Here we read the config_general.yaml file and call the functions defined above. + """ + + + with open("config_general.yaml", "rb") as f: # "rb" mode opens the file in binary format for reading + config = yaml.safe_load(f) + + + target_dir = str(Path(config["directories"]["workspace_dir"]))+"/"+config["directories"]["target_name"] + telescope_ids = list(config["mc_tel_ids"].values()) + + print("***** Generating file config_stereo.yaml...") + print("***** This file can be found in ",target_dir) + configfile_stereo(telescope_ids, target_dir) + + print("***** Generating the bashscript...") + bash_stereo(target_dir) + + print("***** Generating the bashscript for MCs...") + bash_stereoMC(target_dir,"gammadiffuse") + bash_stereoMC(target_dir,"gammas") + bash_stereoMC(target_dir,"protons") + bash_stereoMC(target_dir,"protons_test") + + print("***** Submitting processes to the cluster...") + print("Process name: "+target_dir.split("/")[-2:][1]+"_stereo") + print("To check the jobs submitted to the cluster, type: squeue -n "+target_dir.split("/")[-2:][1]+"_stereo") + + #Below we run the bash scripts to find the stereo events + list_of_stereo_scripts = np.sort(glob.glob("StereoEvents_*.sh")) + + for n,run in enumerate(list_of_stereo_scripts): + if n == 0: + launch_jobs = f"stereo{n}=$(sbatch --parsable {run})" + else: + launch_jobs = launch_jobs + f" && stereo{n}=$(sbatch --parsable --dependency=afterany:$stereo{n-1} {run})" + + #print(launch_jobs) + os.system(launch_jobs) + +if __name__ == "__main__": + main() + + + + + + + +