diff --git a/magicctapipe/conftest.py b/magicctapipe/conftest.py index aba518d71..fe73d724c 100644 --- a/magicctapipe/conftest.py +++ b/magicctapipe/conftest.py @@ -248,6 +248,64 @@ def temp_DL3_monly(tmp_path_factory): """ +@pytest.fixture(scope="session") +def query_test_1(temp_DL2_test): + """ + Toy DL2 + """ + path = temp_DL2_test / "query_test_1.h5" + data = Table() + data["event_id"] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + data["combo_type"] = [1, 3, 3, 2, 2, 0, 1, 2, 3, 0, 1, 2] + data["magic_stereo"] = [ + True, + True, + False, + True, + False, + True, + False, + False, + True, + False, + True, + True, + ] + write_table_hdf5( + data, str(path), "/events/parameters", overwrite=True, serialize_meta=False + ) + return path + + +@pytest.fixture(scope="session") +def query_test_2(temp_DL2_test): + """ + Toy DL2 + """ + path = temp_DL2_test / "query_test_2.h5" + data = Table() + data["event_id"] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + data["combo_type"] = [1, 6, 3, 4, 2, 3, 1, 2, 3, 5, 1, 2] + data["magic_stereo"] = [ + True, + True, + False, + True, + False, + True, + False, + False, + True, + False, + True, + True, + ] + write_table_hdf5( + data, str(path), "/events/parameters", overwrite=True, serialize_meta=False + ) + return path + + @pytest.fixture(scope="session") def dl2_test(temp_DL2_test): """ @@ -652,7 +710,7 @@ def RF_monly(gamma_stereo_monly, p_stereo_monly, temp_rf_monly, config_monly): @pytest.fixture(scope="session") -def gamma_dl2(temp_DL1_gamma_test, RF, temp_DL2_gamma): +def gamma_dl2(temp_DL1_gamma_test, RF, temp_DL2_gamma, config): """ Produce a DL2 file """ @@ -664,6 +722,7 @@ def gamma_dl2(temp_DL1_gamma_test, RF, temp_DL2_gamma): f"-d{str(file)}", f"-r{str(RF)}", f"-o{str(temp_DL2_gamma)}", + f"-c{str(config)}", ] ) @@ -686,7 +745,9 @@ def gamma_dl2(temp_DL1_gamma_test, RF, temp_DL2_gamma): @pytest.fixture(scope="session") -def gamma_dl2_monly(temp_DL1_gamma_test_monly, RF_monly, temp_DL2_gamma_monly): +def gamma_dl2_monly( + temp_DL1_gamma_test_monly, RF_monly, temp_DL2_gamma_monly, config_monly +): """ Produce a DL2 file """ @@ -698,6 +759,7 @@ def gamma_dl2_monly(temp_DL1_gamma_test_monly, RF_monly, temp_DL2_gamma_monly): f"-d{str(file)}", f"-r{str(RF_monly)}", f"-o{str(temp_DL2_gamma_monly)}", + f"-c{str(config_monly)}", ] ) @@ -755,7 +817,7 @@ def IRF_monly(gamma_dl2_monly, config_monly, temp_irf_monly): @pytest.fixture(scope="session") -def p_dl2(temp_DL1_p_test, RF, temp_DL2_p): +def p_dl2(temp_DL1_p_test, RF, temp_DL2_p, config): """ Produce a DL2 file """ @@ -767,13 +829,14 @@ def p_dl2(temp_DL1_p_test, RF, temp_DL2_p): f"-d{str(file)}", f"-r{str(RF)}", f"-o{str(temp_DL2_p)}", + f"-c{str(config)}", ] ) return temp_DL2_p @pytest.fixture(scope="session") -def p_dl2_monly(temp_DL1_p_test_monly, RF_monly, temp_DL2_p_monly): +def p_dl2_monly(temp_DL1_p_test_monly, RF_monly, temp_DL2_p_monly, config_monly): """ Produce a DL2 file """ @@ -785,6 +848,7 @@ def p_dl2_monly(temp_DL1_p_test_monly, RF_monly, temp_DL2_p_monly): f"-d{str(file)}", f"-r{str(RF_monly)}", f"-o{str(temp_DL2_p_monly)}", + f"-c{str(config_monly)}", ] ) return temp_DL2_p_monly @@ -964,7 +1028,7 @@ def stereo_monly(merge_magic_monly, temp_stereo_monly, config_monly): @pytest.fixture(scope="session") -def real_dl2(coincidence_stereo, RF, temp_DL2_real): +def real_dl2(coincidence_stereo, RF, temp_DL2_real, config): """ Produce a DL2 file """ @@ -976,13 +1040,14 @@ def real_dl2(coincidence_stereo, RF, temp_DL2_real): f"-d{str(file)}", f"-r{str(RF)}", f"-o{str(temp_DL2_real)}", + f"-c{str(config)}", ] ) return temp_DL2_real @pytest.fixture(scope="session") -def real_dl2_monly(stereo_monly, RF_monly, temp_DL2_real_monly): +def real_dl2_monly(stereo_monly, RF_monly, temp_DL2_real_monly, config_monly): """ Produce a DL2 file """ @@ -994,6 +1059,7 @@ def real_dl2_monly(stereo_monly, RF_monly, temp_DL2_real_monly): f"-d{str(file)}", f"-r{str(RF_monly)}", f"-o{str(temp_DL2_real_monly)}", + f"-c{str(config_monly)}", ] ) return temp_DL2_real_monly diff --git a/magicctapipe/io/__init__.py b/magicctapipe/io/__init__.py index e1cf9fcc1..a852c5667 100644 --- a/magicctapipe/io/__init__.py +++ b/magicctapipe/io/__init__.py @@ -17,14 +17,13 @@ format_object, get_dl2_mean, get_stereo_events, - get_stereo_events_old, load_dl2_data_file, load_irf_files, load_lst_dl1_data_file, load_magic_dl1_data_files, load_mc_dl2_data_file, - load_train_data_files, load_train_data_files_tel, + query_data, resource_file, save_pandas_data_in_table, telescope_combinations, @@ -42,14 +41,13 @@ "format_object", "get_dl2_mean", "get_stereo_events", - "get_stereo_events_old", "load_dl2_data_file", "load_irf_files", "load_lst_dl1_data_file", "load_magic_dl1_data_files", "load_mc_dl2_data_file", - "load_train_data_files", "load_train_data_files_tel", + "query_data", "save_pandas_data_in_table", "telescope_combinations", "resource_file", diff --git a/magicctapipe/io/gadf.py b/magicctapipe/io/gadf.py index 1cf9bb196..2bb0a9275 100644 --- a/magicctapipe/io/gadf.py +++ b/magicctapipe/io/gadf.py @@ -13,6 +13,7 @@ from .. import __version__ as MCP_VERSION from ..utils.functions import HEIGHT_ORM, LAT_ORM, LON_ORM +from .io import telescope_combinations __all__ = [ "create_gh_cuts_hdu", @@ -88,7 +89,7 @@ def create_gh_cuts_hdu(gh_cuts, reco_energy_bins, fov_offset_bins, **header_card def create_event_hdu( - event_table, on_time, deadc, source_name, source_ra=None, source_dec=None + event_table, config, on_time, deadc, source_name, source_ra=None, source_dec=None ): """ Creates a fits binary table HDU for shower events. @@ -97,6 +98,8 @@ def create_event_hdu( ---------- event_table : astropy.table.table.QTable Table of the DL2 events surviving gammaness cuts + config : dict + Dictionary with information about the telescope IDs. on_time : astropy.table.table.QTable ON time of the input data deadc : float @@ -123,12 +126,7 @@ def create_event_hdu( If the source name cannot be resolved and also either or both of source RA/Dec coordinate is set to None """ - TEL_COMBINATIONS = { - "LST1_M1": [1, 2], # combo_type = 0 - "LST1_M1_M2": [1, 2, 3], # combo_type = 1 - "LST1_M2": [1, 3], # combo_type = 2 - "M1_M2": [2, 3], # combo_type = 3 - } # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) + _, TEL_COMBINATIONS = telescope_combinations(config) mjdreff, mjdrefi = np.modf(MJDREF.mjd) time_start = Time(event_table["timestamp"][0], format="unix", scale="utc") diff --git a/magicctapipe/io/io.py b/magicctapipe/io/io.py index 11ea135af..df7fcb03d 100644 --- a/magicctapipe/io/io.py +++ b/magicctapipe/io/io.py @@ -35,14 +35,13 @@ "format_object", "get_dl2_mean", "get_stereo_events", - "get_stereo_events_old", "load_dl2_data_file", "load_irf_files", "load_lst_dl1_data_file", "load_magic_dl1_data_files", "load_mc_dl2_data_file", - "load_train_data_files", "load_train_data_files_tel", + "query_data", "save_pandas_data_in_table", "telescope_combinations", "resource_file", @@ -69,6 +68,73 @@ DEAD_TIME_MAGIC = 26 * u.us +def query_data(df_events, event_type, magic_only, three_or_more, is_mc): + """ + Function to select data (both real and MC) according to the event_type + + Parameters + ---------- + + df_events : pandas.core.frame.DataFrame + Dataframe of the events + event_type : str + Type of events to be selected + 'software' : Stadard MAGIC+LST-1 case; remove magic-only events + 'trigger_3tels_or_more' : at least three telescopes triggered + 'trigger_no_magic_stereo' : no need for both MAGIC to trigger, to be used if more than one LST is used; remove MAGIC-only data if they exist + 'magic_only' : only M1_M2 events selected + 'hardware' : hardware trigger + magic_only : int or None + Index of the M1_M2 combination (if it exist, otherwise it is equal to None) + three_or_more : list + List of combination indexes for combinations with at least 3 telescopes + is_mc : bool + True if this function is called on MC + + Returns + ------- + + pandas.core.frame.DataFrame + Dataframe of events after query + """ + if event_type == "software": + if magic_only is not None: + + if is_mc: + df_events.query( + f"(combo_type != {magic_only}) & (magic_stereo == True)", + inplace=True, + ) + else: + df_events.query(f"combo_type != {magic_only}", inplace=True) + + else: + logger.warning( + 'Requested event type and provided telescopes IDs are not consistent; "software" must be used in case of standard MAGIC+LST-1 analyses' + ) + return + + elif event_type == "trigger_3tels_or_more": + df_events.query(f"combo_type == {three_or_more}", inplace=True) + + elif event_type == "trigger_no_magic_stereo": + if magic_only is not None: + df_events.query(f"combo_type != {magic_only}", inplace=True) + + elif event_type == "magic_only": + if magic_only is not None: + df_events.query(f"combo_type == {magic_only}", inplace=True) + else: + logger.warning( + "MAGIC-only analysis requested, but inconsistent with the provided telescope IDs: check the configuration file" + ) + return + elif event_type != "hardware": + raise ValueError(f"Unknown event type '{event_type}'.") + + return df_events + + def check_input_list(config): """ This function checks if the input telescope list is organized as follows: @@ -249,89 +315,6 @@ def format_object(input_object): return string -def get_stereo_events_old( - event_data, quality_cuts=None, group_index=["obs_id", "event_id"] -): - """ - Gets the stereo events surviving specified quality cuts. - - It also adds the telescope multiplicity `multiplicity` and - combination types `combo_type` to the output data frame. - - Parameters - ---------- - event_data : pandas.DataFrame - Data frame of shower events - quality_cuts : str, optional - Quality cuts applied to the input data - group_index : list, optional - Index to group telescope events - - Returns - ------- - pandas.DataFrame - Data frame of the stereo events surviving the quality cuts - """ - TEL_COMBINATIONS = { - "LST1_M1": [1, 2], # combo_type = 0 - "LST1_M1_M2": [1, 2, 3], # combo_type = 1 - "LST1_M2": [1, 3], # combo_type = 2 - "M1_M2": [2, 3], # combo_type = 3 - } # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) - event_data_stereo = event_data.copy() - - # Apply the quality cuts - if quality_cuts is not None: - event_data_stereo.query(quality_cuts, inplace=True) - - # Extract stereo events - event_data_stereo["multiplicity"] = event_data_stereo.groupby(group_index).size() - event_data_stereo.query("multiplicity == [2, 3]", inplace=True) - - # Check the total number of events - n_events_total = len(event_data_stereo.groupby(group_index).size()) - logger.info(f"\nIn total {n_events_total} stereo events are found:") - - n_events_per_combo = {} - - # Loop over every telescope combination type - for combo_type, (tel_combo, tel_ids) in enumerate(TEL_COMBINATIONS.items()): - multiplicity = len(tel_ids) - - df_events = event_data_stereo.query( - f"(tel_id == {tel_ids}) & (multiplicity == {multiplicity})" - ).copy() - - # Here we recalculate the multiplicity and apply the cut again, - # since with the above cut the events belonging to other - # combination types are also extracted. For example, in case of - # tel_id = [1, 2], the tel 1 events of the combination [1, 3] - # and the tel 2 events of the combination [2, 3] remain in the - # data frame, whose multiplicity will be recalculated as 1 and - # so will be removed with the following cuts. - - df_events["multiplicity"] = df_events.groupby(group_index).size() - df_events.query(f"multiplicity == {multiplicity}", inplace=True) - - # Assign the combination type - event_data_stereo.loc[df_events.index, "combo_type"] = combo_type - - n_events = len(df_events.groupby(group_index).size()) - percentage = 100 * n_events / n_events_total - - key = f"{tel_combo} (type {combo_type})" - value = f"{n_events:.0f} events ({percentage:.1f}%)" - - n_events_per_combo[key] = value - - event_data_stereo = event_data_stereo.astype({"combo_type": int}) - - # Show the number of events per combination type - logger.info(format_object(n_events_per_combo)) - - return event_data_stereo - - def get_stereo_events( event_data, config, @@ -347,7 +330,7 @@ def get_stereo_events( Parameters ---------- - event_data : pandas.DataFrame + event_data : pandas.core.frame.DataFrame Data frame of shower events config : dict Read from the yaml file with information about the telescope IDs. @@ -360,7 +343,7 @@ def get_stereo_events( Returns ------- - pandas.DataFrame + pandas.core.frame.DataFrame Data frame of the stereo events surviving the quality cuts """ @@ -436,7 +419,7 @@ def get_dl2_mean(event_data, weight_type="simple", group_index=["obs_id", "event Parameters ---------- - event_data : pandas.DataFrame + event_data : pandas.core.frame.DataFrame Data frame of shower events weight_type : str, optional Type of the weights for telescope-wise DL2 parameters - @@ -448,7 +431,7 @@ def get_dl2_mean(event_data, weight_type="simple", group_index=["obs_id", "event Returns ------- - pandas.DataFrame + pandas.core.frame.DataFrame Data frame of the shower events with mean DL2 parameters Raises @@ -558,7 +541,7 @@ def load_lst_dl1_data_file(input_file): Returns ------- - event_data : pandas.DataFrame + event_data : pandas.core.frame.DataFrame Data frame of LST-1 events subarray : ctapipe.instrument.subarray.SubarrayDescription LST-1 subarray description @@ -653,7 +636,7 @@ def load_magic_dl1_data_files(input_dir, config): Returns ------- - event_data : pandas.DataFrame + event_data : pandas.core.frame.DataFrame Dataframe of MAGIC events subarray : ctapipe.instrument.subarray.SubarrayDescription MAGIC subarray description @@ -721,96 +704,6 @@ def load_magic_dl1_data_files(input_dir, config): return event_data, subarray -def load_train_data_files( - input_dir, offaxis_min=None, offaxis_max=None, true_event_class=None -): - """ - Loads DL1-stereo data files and separates the shower events per - telescope combination type for training RFs. - - Parameters - ---------- - input_dir : str - Path to a directory where input DL1-stereo files are stored - offaxis_min : str, optional - Minimum shower off-axis angle allowed, whose format should be - acceptable by `astropy.units.quantity.Quantity` - offaxis_max : str, optional - Maximum shower off-axis angle allowed, whose format should be - acceptable by `astropy.units.quantity.Quantity` - true_event_class : int, optional - True event class of the input events - - Returns - ------- - dict - Data frames of the shower events separated by the telescope - combination types - - Raises - ------ - FileNotFoundError - If any DL1-stereo data files are not found in the input - directory - """ - TEL_COMBINATIONS = { - "LST1_M1": [1, 2], # combo_type = 0 - "LST1_M1_M2": [1, 2, 3], # combo_type = 1 - "LST1_M2": [1, 3], # combo_type = 2 - "M1_M2": [2, 3], # combo_type = 3 - } # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) - - # Find the input files - file_mask = f"{input_dir}/dl1_stereo_*.h5" - - input_files = glob.glob(file_mask) - input_files.sort() - - if len(input_files) == 0: - raise FileNotFoundError( - "Could not find any DL1-stereo data files in the input directory." - ) - - # Load the input files - logger.info("\nThe following DL1-stereo data files are found:") - - data_list = [] - - for input_file in input_files: - logger.info(input_file) - - df_events = pd.read_hdf(input_file, key="events/parameters") - data_list.append(df_events) - - event_data = pd.concat(data_list) - event_data.set_index(GROUP_INDEX_TRAIN, inplace=True) - event_data.sort_index(inplace=True) - - if offaxis_min is not None: - offaxis_min = u.Quantity(offaxis_min).to_value("deg") - event_data.query(f"off_axis >= {offaxis_min}", inplace=True) - - if offaxis_max is not None: - offaxis_max = u.Quantity(offaxis_max).to_value("deg") - event_data.query(f"off_axis <= {offaxis_max}", inplace=True) - - if true_event_class is not None: - event_data["true_event_class"] = true_event_class - - event_data = get_stereo_events_old(event_data, group_index=GROUP_INDEX_TRAIN) - - data_train = {} - - # Loop over every telescope combination type - for combo_type, tel_combo in enumerate(TEL_COMBINATIONS.keys()): - df_events = event_data.query(f"combo_type == {combo_type}") - - if not df_events.empty: - data_train[tel_combo] = df_events - - return data_train - - def load_train_data_files_tel( input_dir, config, offaxis_min=None, offaxis_max=None, true_event_class=None ): @@ -823,7 +716,7 @@ def load_train_data_files_tel( input_dir : str Path to a directory where input DL1-stereo files are stored config : dict - Yaml file with information about the telescope IDs. + Dictionary with information about the telescope IDs. offaxis_min : str, optional Minimum shower off-axis angle allowed, whose format should be acceptable by `astropy.units.quantity.Quantity` @@ -898,22 +791,22 @@ def load_train_data_files_tel( return data_train -def load_mc_dl2_data_file(input_file, quality_cuts, event_type, weight_type_dl2): +def load_mc_dl2_data_file( + config, input_file, quality_cuts, event_type, weight_type_dl2 +): """ Loads a MC DL2 data file for creating the IRFs. Parameters ---------- + config : dict + Evoked from an yaml file with information about the telescope IDs. input_file : str Path to an input MC DL2 data file quality_cuts : str Quality cuts applied to the input events event_type : str - Type of the events which will be used - - "software" uses software coincident events, - "software_only_3tel" uses only 3-tel combination events, - "magic_only" uses only MAGIC-stereo combination events, and - "hardware" uses all the telescope combination events + Type of the events which will be used weight_type_dl2 : str Type of the weight for averaging telescope-wise DL2 parameters - "simple", "variance" or "intensity" are allowed @@ -921,8 +814,8 @@ def load_mc_dl2_data_file(input_file, quality_cuts, event_type, weight_type_dl2) Returns ------- event_table : astropy.table.table.QTable - Table with the MC DL2 events surviving the cuts - pointing : np.ndarray + Table of the MC DL2 events surviving the cuts + pointing : numpy.ndarray Telescope pointing direction (zd, az) in the unit of degree sim_info : pyirf.simulations.SimulatedEventsInfo Container of the simulation information @@ -932,29 +825,30 @@ def load_mc_dl2_data_file(input_file, quality_cuts, event_type, weight_type_dl2) ValueError If the input event type is not known """ + _, TEL_COMBINATIONS = telescope_combinations(config) + logger.info(TEL_COMBINATIONS) + three_or_more = [] + magic_only = None + for n, combination in enumerate(TEL_COMBINATIONS.values()): + if len(combination) >= 3: + three_or_more.append(n) + for n, combination in enumerate(TEL_COMBINATIONS.keys()): + if combination in ["MAGIC-II_MAGIC-I", "MAGIC-I_MAGIC-II"]: + magic_only = n + break # Load the input file df_events = pd.read_hdf(input_file, key="events/parameters") df_events.set_index(["obs_id", "event_id", "tel_id"], inplace=True) df_events.sort_index(inplace=True) - df_events = get_stereo_events_old(df_events, quality_cuts) + df_events = get_stereo_events( + df_events, config, quality_cuts, eval_multi_combo=False + ) logger.info(f"\nExtracting the events of the '{event_type}' type...") - if event_type == "software": - # The events of the MAGIC-stereo combination are excluded - df_events.query("(combo_type < 3) & (magic_stereo == True)", inplace=True) - - elif event_type == "software_only_3tel": - df_events.query("combo_type == 1", inplace=True) - - elif event_type == "magic_only": - df_events.query("combo_type == 3", inplace=True) - - elif event_type != "hardware": - raise ValueError(f"Unknown event type '{event_type}'.") - + df_events = query_data(df_events, event_type, magic_only, three_or_more, True) n_events = len(df_events.groupby(["obs_id", "event_id"]).size()) logger.info(f"--> {n_events} stereo events") @@ -1029,22 +923,20 @@ def load_mc_dl2_data_file(input_file, quality_cuts, event_type, weight_type_dl2) return event_table, pointing, sim_info -def load_dl2_data_file(input_file, quality_cuts, event_type, weight_type_dl2): +def load_dl2_data_file(config, input_file, quality_cuts, event_type, weight_type_dl2): """ Loads a DL2 data file for processing to DL3. Parameters ---------- + config : dict + Evoked from an yaml file with information about the telescope IDs. input_file : str Path to an input DL2 data file quality_cuts : str Quality cuts applied to the input events event_type : str - Type of the events which will be used - - "software" uses software coincident events, - "software_only_3tel" uses only 3-tel combination events, - "magic_only" uses only MAGIC-stereo combination events, and - "hardware" uses all the telescope combination events + Type of the events which will be used weight_type_dl2 : str Type of the weight for averaging telescope-wise DL2 parameters - "simple", "variance" or "intensity" are allowed @@ -1064,34 +956,28 @@ def load_dl2_data_file(input_file, quality_cuts, event_type, weight_type_dl2): If the input event type is not known """ + _, TEL_COMBINATIONS = telescope_combinations(config) + three_or_more = [] + magic_only = None + for n, combination in enumerate(TEL_COMBINATIONS.values()): + if len(combination) >= 3: + three_or_more.append(n) + for n, combination in enumerate(TEL_COMBINATIONS.keys()): + if combination in ["MAGIC-II_MAGIC-I", "MAGIC-I_MAGIC-II"]: + magic_only = n + break # Load the input file event_data = pd.read_hdf(input_file, key="events/parameters") event_data.set_index(["obs_id", "event_id", "tel_id"], inplace=True) event_data.sort_index(inplace=True) - event_data = get_stereo_events_old(event_data, quality_cuts) + event_data = get_stereo_events( + event_data, config, quality_cuts, eval_multi_combo=False + ) logger.info(f"\nExtracting the events of the '{event_type}' type...") - if event_type == "software": - # The events of the MAGIC-stereo combination are excluded - event_data.query("combo_type < 3", inplace=True) - - elif event_type == "software_only_3tel": - event_data.query("combo_type == 1", inplace=True) - - elif event_type == "magic_only": - event_data.query("combo_type == 3", inplace=True) - - elif event_type == "hardware": - logger.warning( - "WARNING: Please confirm that this type is correct for the input data, " - "since the hardware trigger between LST-1 and MAGIC may NOT be used." - ) - - else: - raise ValueError(f"Unknown event type '{event_type}'.") - + event_data = query_data(event_data, event_type, magic_only, three_or_more, False) n_events = len(event_data.groupby(["obs_id", "event_id"]).size()) logger.info(f"--> {n_events} stereo events") @@ -1382,7 +1268,7 @@ def save_pandas_data_in_table( Parameters ---------- - input_data : pandas.DataFrame + input_data : pandas.core.frame.DataFrame Pandas data frame output_file : str Path to an output HDF file diff --git a/magicctapipe/io/tests/test_gadf.py b/magicctapipe/io/tests/test_gadf.py index c5b31edcf..fbeddbbc9 100644 --- a/magicctapipe/io/tests/test_gadf.py +++ b/magicctapipe/io/tests/test_gadf.py @@ -95,8 +95,8 @@ def test_create_gti_hdu(event): assert gti_head["TIMESYS"] == "UTC" -def test_create_event_hdu(event): - evt_fits = create_event_hdu(event, 200 * u.s, 0.97, "Crab") +def test_create_event_hdu(event, config_gen): + evt_fits = create_event_hdu(event, config_gen, 200 * u.s, 0.97, "Crab") evt = evt_fits.data assert np.array_equal(evt["EVENT_ID"], np.array([1, 2, 3])) assert np.array_equal(evt["RA"], np.array([84.2, 83.8, 84.0])) @@ -114,9 +114,9 @@ def test_create_event_hdu(event): ) -def test_create_event_hdu_exc(event): +def test_create_event_hdu_exc(event, config_gen): with pytest.raises( ValueError, match="The input RA/Dec coordinate is set to `None`.", ): - _ = create_event_hdu(event, 200 * u.s, 0.97, "abc") + _ = create_event_hdu(event, config_gen, 200 * u.s, 0.97, "abc") diff --git a/magicctapipe/io/tests/test_io.py b/magicctapipe/io/tests/test_io.py index 80505e85d..5c8e91380 100644 --- a/magicctapipe/io/tests/test_io.py +++ b/magicctapipe/io/tests/test_io.py @@ -1,4 +1,5 @@ import glob +import logging import numpy as np import pandas as pd @@ -15,14 +16,78 @@ load_lst_dl1_data_file, load_magic_dl1_data_files, load_mc_dl2_data_file, - load_train_data_files, load_train_data_files_tel, + query_data, save_pandas_data_in_table, telescope_combinations, ) +LOGGER = logging.getLogger(__name__) + class TestGeneral: + def test_query_data(self, query_test_1, query_test_2, caplog): + + query_test = pd.read_hdf(query_test_1, key="/events/parameters") + data = query_data(query_test, "software", 3, [], True) + assert np.allclose(np.array(data["event_id"]), np.array([0, 3, 5, 10, 11])) + + query_test = pd.read_hdf(query_test_1, key="/events/parameters") + data = query_data(query_test, "software", 3, [], False) + assert np.allclose( + np.array(data["event_id"]), np.array([0, 3, 4, 5, 6, 7, 9, 10, 11]) + ) + + with caplog.at_level(logging.WARNING): + query_test = pd.read_hdf(query_test_1, key="/events/parameters") + data = query_data(query_test, "software", None, [], False) + assert ( + 'Requested event type and provided telescopes IDs are not consistent; "software" must be used in case of standard MAGIC+LST-1 analyses' + in caplog.text + ) + + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + data = query_data(query_test, "trigger_3tels_or_more", None, [4, 5, 6], False) + assert np.allclose(np.array(data["event_id"]), np.array([1, 3, 9])) + + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + data = query_data(query_test, "trigger_no_magic_stereo", None, [], False) + assert np.allclose( + np.array(data["event_id"]), np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + ) + + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + data = query_data(query_test, "trigger_no_magic_stereo", 3, [], False) + assert np.allclose( + np.array(data["event_id"]), np.array([0, 1, 3, 4, 6, 7, 9, 10, 11]) + ) + + with caplog.at_level(logging.WARNING): + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + data = query_data(query_test, "magic_only", None, [], False) + assert ( + "MAGIC-only analysis requested, but inconsistent with the provided telescope IDs: check the configuration file" + in caplog.text + ) + + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + data = query_data(query_test, "magic_only", 3, [], False) + assert np.allclose(np.array(data["event_id"]), np.array([2, 5, 8])) + + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + data = query_data(query_test, "hardware", None, [], False) + assert np.allclose( + np.array(data["event_id"]), np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + ) + + query_test = pd.read_hdf(query_test_2, key="/events/parameters") + event_type = "abc" + with pytest.raises( + ValueError, + match=f"Unknown event type '{event_type}'.", + ): + _ = query_data(query_test, event_type, None, [], False) + def test_check_input_list(self): """ Test on different dictionaries @@ -160,11 +225,17 @@ def test_telescope_combinations(self, config_gen, config_gen_4lst): LSTs, LSTs_comb = telescope_combinations(config_gen_4lst) assert M_LST == {1: "LST-1", 2: "MAGIC-I", 3: "MAGIC-II"} assert M_LST_comb == { - "LST-1_MAGIC-I": [1, 2], - "LST-1_MAGIC-I_MAGIC-II": [1, 2, 3], - "LST-1_MAGIC-II": [1, 3], - "MAGIC-I_MAGIC-II": [2, 3], + "LST-1_MAGIC-I": [1, 2], # combo_type = 0 + "LST-1_MAGIC-I_MAGIC-II": [1, 2, 3], # combo_type = 1 + "LST-1_MAGIC-II": [1, 3], # combo_type = 2 + "MAGIC-I_MAGIC-II": [2, 3], # combo_type = 3 } + assert list(M_LST_comb.keys()) == [ + "LST-1_MAGIC-I", + "LST-1_MAGIC-I_MAGIC-II", + "LST-1_MAGIC-II", + "MAGIC-I_MAGIC-II", + ] assert LSTs == {1: "LST-1", 3: "LST-2", 2: "LST-3", 5: "LST-4"} assert LSTs_comb == { "LST-1_LST-2": [1, 3], @@ -274,42 +345,6 @@ def test_get_stereo_events_mc_cut(self, gamma_stereo, p_stereo, config_gen): assert np.all(data["intensity"] > 50) assert len(data) > 0 - def test_load_train_data_files(self, p_stereo, gamma_stereo): - """ - Check dictionary of the combo types - """ - - for stereo in [p_stereo, gamma_stereo]: - events = load_train_data_files(str(stereo[0])) - assert list(events.keys()) == ["LST1_M1", "LST1_M1_M2", "LST1_M2", "M1_M2"] - data = events["LST1_M1"] - assert np.all(data["combo_type"] == 0) - assert "off_axis" in data.columns - assert "true_event_class" not in data.columns - - def test_load_train_data_files_off(self, gamma_stereo): - """ - Check off-axis cut - """ - events = load_train_data_files( - str(gamma_stereo[0]), offaxis_min="0.2 deg", offaxis_max="0.5 deg" - ) - data = events["LST1_M1"] - assert np.all(data["off_axis"] >= 0.2) - assert np.all(data["off_axis"] <= 0.5) - assert len(data) > 0 - - def test_load_train_data_files_exc(self, temp_train_exc): - """ - Check on exceptions - """ - - with pytest.raises( - FileNotFoundError, - match="Could not find any DL1-stereo data files in the input directory.", - ): - _ = load_train_data_files(str(temp_train_exc)) - def test_load_train_data_files_tel(self, p_stereo, gamma_stereo, config_gen): """ Check dictionary @@ -345,7 +380,7 @@ def test_load_train_data_files_tel_exc(self, temp_train_exc, config_gen): FileNotFoundError, match="Could not find any DL1-stereo data files in the input directory.", ): - _ = load_train_data_files(str(temp_train_exc), config_gen) + _ = load_train_data_files_tel(str(temp_train_exc), config_gen) @pytest.mark.dependency(depends=["test_exist_dl1_stereo_mc"]) @@ -354,7 +389,7 @@ def test_exist_rf(RF): Check if RFs produced """ - assert len(glob.glob(f"{RF}/*")) == 12 + assert len(glob.glob(f"{RF}/*")) == 9 @pytest.mark.dependency(depends=["test_exist_rf"]) @@ -369,14 +404,14 @@ def test_exist_dl2_mc(p_dl2, gamma_dl2): @pytest.mark.dependency(depends=["test_exist_dl2_mc"]) class TestDL2MC: - def test_load_mc_dl2_data_file(self, p_dl2, gamma_dl2): + def test_load_mc_dl2_data_file(self, config_gen, p_dl2, gamma_dl2): """ Checks on default loading """ dl2_mc = [p for p in gamma_dl2.glob("*")] + [p for p in p_dl2.glob("*")] for file in dl2_mc: data, point, _ = load_mc_dl2_data_file( - str(file), "width>0", "software", "simple" + config_gen, str(file), "width>0", "software", "simple" ) assert "pointing_alt" in data.colnames assert "theta" in data.colnames @@ -385,31 +420,31 @@ def test_load_mc_dl2_data_file(self, p_dl2, gamma_dl2): assert point[0] >= 0 assert point[0] <= 90 - def test_load_mc_dl2_data_file_cut(self, p_dl2, gamma_dl2): + def test_load_mc_dl2_data_file_cut(self, config_gen, p_dl2, gamma_dl2): """ Check on quality cuts """ dl2_mc = [p for p in gamma_dl2.glob("*")] + [p for p in p_dl2.glob("*")] for file in dl2_mc: data, _, _ = load_mc_dl2_data_file( - str(file), "gammaness>0.1", "software", "simple" + config_gen, str(file), "gammaness>0.1", "software", "simple" ) assert np.all(data["gammaness"] > 0.1) assert len(data) > 0 - def test_load_mc_dl2_data_file_opt(self, p_dl2, gamma_dl2): + def test_load_mc_dl2_data_file_opt(self, config_gen, p_dl2, gamma_dl2): """ Check on event_type """ dl2_mc = [p for p in gamma_dl2.glob("*")] + [p for p in p_dl2.glob("*")] for file in dl2_mc: data_s, _, _ = load_mc_dl2_data_file( - str(file), "width>0", "software", "simple" + config_gen, str(file), "width>0", "software", "simple" ) assert np.all(data_s["combo_type"] < 3) assert len(data_s) > 0 - def test_load_mc_dl2_data_file_exc(self, p_dl2, gamma_dl2): + def test_load_mc_dl2_data_file_exc(self, config_gen, p_dl2, gamma_dl2): """ Check on event_type exceptions """ @@ -421,7 +456,7 @@ def test_load_mc_dl2_data_file_exc(self, p_dl2, gamma_dl2): match=f"Unknown event type '{event_type}'.", ): _, _, _ = load_mc_dl2_data_file( - str(file), "width>0", event_type, "simple" + config_gen, str(file), "width>0", event_type, "simple" ) def test_get_dl2_mean_mc(self, p_dl2, gamma_dl2): @@ -694,13 +729,13 @@ def test_exist_dl2(real_dl2): @pytest.mark.dependency(depends=["test_exist_dl2"]) class TestDL2Data: - def test_load_dl2_data_file(self, real_dl2): + def test_load_dl2_data_file(self, config_gen, real_dl2): """ Checks on default loading """ for file in real_dl2.glob("*"): data, on, dead = load_dl2_data_file( - str(file), "width>0", "software", "simple" + config_gen, str(file), "width>0", "software", "simple" ) assert "pointing_alt" in data.colnames assert "timestamp" in data.colnames @@ -709,29 +744,29 @@ def test_load_dl2_data_file(self, real_dl2): assert on > 0 assert dead > 0 - def test_load_dl2_data_file_cut(self, real_dl2): + def test_load_dl2_data_file_cut(self, config_gen, real_dl2): """ Check on quality cuts """ for file in real_dl2.glob("*"): data, _, _ = load_dl2_data_file( - str(file), "gammaness<0.9", "software", "simple" + config_gen, str(file), "gammaness<0.9", "software", "simple" ) assert np.all(data["gammaness"] < 0.9) assert len(data) > 0 - def test_load_dl2_data_file_opt(self, real_dl2): + def test_load_dl2_data_file_opt(self, config_gen, real_dl2): """ Check on event_type """ for file in real_dl2.glob("*"): data_s, _, _ = load_dl2_data_file( - str(file), "width>0", "software", "simple" + config_gen, str(file), "width>0", "software", "simple" ) assert np.all(data_s["combo_type"] < 3) assert len(data_s) > 0 - def test_load_dl2_data_file_exc(self, real_dl2): + def test_load_dl2_data_file_exc(self, config_gen, real_dl2): """ Check on event_type exceptions """ @@ -741,7 +776,9 @@ def test_load_dl2_data_file_exc(self, real_dl2): ValueError, match=f"Unknown event type '{event_type}'.", ): - _, _, _ = load_dl2_data_file(str(file), "width>0", event_type, "simple") + _, _, _ = load_dl2_data_file( + config_gen, str(file), "width>0", event_type, "simple" + ) def test_get_dl2_mean_real(self, real_dl2): """ diff --git a/magicctapipe/io/tests/test_io_monly.py b/magicctapipe/io/tests/test_io_monly.py index c9a0d9825..e450d7100 100644 --- a/magicctapipe/io/tests/test_io_monly.py +++ b/magicctapipe/io/tests/test_io_monly.py @@ -11,7 +11,6 @@ load_irf_files, load_magic_dl1_data_files, load_mc_dl2_data_file, - load_train_data_files, load_train_data_files_tel, ) @@ -80,31 +79,6 @@ def test_get_stereo_events_mc_cut( assert np.all(data["intensity"] > 50) assert len(data) > 0 - def test_load_train_data_files(self, p_stereo_monly, gamma_stereo_monly): - """ - Check dictionary - """ - - for stereo in [p_stereo_monly, gamma_stereo_monly]: - events = load_train_data_files(str(stereo[0])) - assert list(events.keys()) == ["M1_M2"] - data = events["M1_M2"] - assert np.all(data["combo_type"] == 3) - assert "off_axis" in data.columns - assert "true_event_class" not in data.columns - - def test_load_train_data_files_off(self, gamma_stereo_monly): - """ - Check off-axis cut - """ - events = load_train_data_files( - str(gamma_stereo_monly[0]), offaxis_min="0.2 deg", offaxis_max="0.5 deg" - ) - data = events["M1_M2"] - assert np.all(data["off_axis"] >= 0.2) - assert np.all(data["off_axis"] <= 0.5) - assert len(data) > 0 - def test_load_train_data_files_tel( self, p_stereo_monly, gamma_stereo_monly, config_gen ): @@ -141,7 +115,7 @@ def test_exist_rf(RF_monly): Check if RFs produced """ - assert len(glob.glob(f"{RF_monly}/*")) == 3 + assert len(glob.glob(f"{RF_monly}/*")) == 6 @pytest.mark.dependency(depends=["test_exist_rf"]) @@ -156,7 +130,7 @@ def test_exist_dl2_mc(p_dl2_monly, gamma_dl2_monly): @pytest.mark.dependency(depends=["test_exist_dl2_mc"]) class TestDL2MC: - def test_load_mc_dl2_data_file(self, p_dl2_monly, gamma_dl2_monly): + def test_load_mc_dl2_data_file(self, config_gen, p_dl2_monly, gamma_dl2_monly): """ Checks on default loading """ @@ -165,7 +139,7 @@ def test_load_mc_dl2_data_file(self, p_dl2_monly, gamma_dl2_monly): ] for file in dl2_mc: data, point, _ = load_mc_dl2_data_file( - str(file), "width>0", "magic_only", "simple" + config_gen, str(file), "width>0", "magic_only", "simple" ) assert "pointing_alt" in data.colnames assert "theta" in data.colnames @@ -174,7 +148,7 @@ def test_load_mc_dl2_data_file(self, p_dl2_monly, gamma_dl2_monly): assert point[0] >= 0 assert point[0] <= 90 - def test_load_mc_dl2_data_file_cut(self, p_dl2_monly, gamma_dl2_monly): + def test_load_mc_dl2_data_file_cut(self, config_gen, p_dl2_monly, gamma_dl2_monly): """ Check on quality cuts """ @@ -183,12 +157,12 @@ def test_load_mc_dl2_data_file_cut(self, p_dl2_monly, gamma_dl2_monly): ] for file in dl2_mc: data, _, _ = load_mc_dl2_data_file( - str(file), "gammaness>0.1", "magic_only", "simple" + config_gen, str(file), "gammaness>0.1", "magic_only", "simple" ) assert np.all(data["gammaness"] > 0.1) assert len(data) > 0 - def test_load_mc_dl2_data_file_opt(self, p_dl2_monly, gamma_dl2_monly): + def test_load_mc_dl2_data_file_opt(self, config_gen, p_dl2_monly, gamma_dl2_monly): """ Check on event_type """ @@ -198,13 +172,13 @@ def test_load_mc_dl2_data_file_opt(self, p_dl2_monly, gamma_dl2_monly): for file in dl2_mc: data_m, _, _ = load_mc_dl2_data_file( - str(file), "width>0", "magic_only", "simple" + config_gen, str(file), "width>0", "magic_only", "simple" ) assert np.all(data_m["combo_type"] == 3) assert len(data_m) > 0 - def test_load_mc_dl2_data_file_exc(self, p_dl2_monly, gamma_dl2_monly): + def test_load_mc_dl2_data_file_exc(self, config_gen, p_dl2_monly, gamma_dl2_monly): """ Check on event_type exceptions """ @@ -218,7 +192,7 @@ def test_load_mc_dl2_data_file_exc(self, p_dl2_monly, gamma_dl2_monly): match=f"Unknown event type '{event_type}'.", ): _, _, _ = load_mc_dl2_data_file( - str(file), "width>0", event_type, "simple" + config_gen, str(file), "width>0", event_type, "simple" ) def test_get_dl2_mean_mc(self, p_dl2_monly, gamma_dl2_monly): @@ -410,13 +384,13 @@ def test_exist_dl2(real_dl2_monly): @pytest.mark.dependency(depends=["test_exist_dl2"]) class TestDL2Data: - def test_load_dl2_data_file(self, real_dl2_monly): + def test_load_dl2_data_file(self, config_gen, real_dl2_monly): """ Checks on default loading """ for file in real_dl2_monly.glob("*"): data, on, dead = load_dl2_data_file( - str(file), "width>0", "magic_only", "simple" + config_gen, str(file), "width>0", "magic_only", "simple" ) assert "pointing_alt" in data.colnames assert "timestamp" in data.colnames @@ -425,31 +399,31 @@ def test_load_dl2_data_file(self, real_dl2_monly): assert on > 0 assert dead > 0 - def test_load_dl2_data_file_cut(self, real_dl2_monly): + def test_load_dl2_data_file_cut(self, config_gen, real_dl2_monly): """ Check on quality cuts """ for file in real_dl2_monly.glob("*"): data, _, _ = load_dl2_data_file( - str(file), "gammaness<0.9", "magic_only", "simple" + config_gen, str(file), "gammaness<0.9", "magic_only", "simple" ) assert np.all(data["gammaness"] < 0.9) assert len(data) > 0 - def test_load_dl2_data_file_opt(self, real_dl2_monly): + def test_load_dl2_data_file_opt(self, config_gen, real_dl2_monly): """ Check on event_type """ for file in real_dl2_monly.glob("*"): data_m, _, _ = load_dl2_data_file( - str(file), "width>0", "magic_only", "simple" + config_gen, str(file), "width>0", "magic_only", "simple" ) assert np.all(data_m["combo_type"] == 3) assert len(data_m) > 0 - def test_load_dl2_data_file_exc(self, real_dl2_monly): + def test_load_dl2_data_file_exc(self, config_gen, real_dl2_monly): """ Check on event_type exceptions """ @@ -459,7 +433,9 @@ def test_load_dl2_data_file_exc(self, real_dl2_monly): ValueError, match=f"Unknown event type '{event_type}'.", ): - _, _, _ = load_dl2_data_file(str(file), "width>0", event_type, "simple") + _, _, _ = load_dl2_data_file( + config_gen, str(file), "width>0", event_type, "simple" + ) def test_get_dl2_mean_real(self, real_dl2_monly): """ diff --git a/magicctapipe/reco/estimators.py b/magicctapipe/reco/estimators.py index c0b2de11e..c25979f2c 100644 --- a/magicctapipe/reco/estimators.py +++ b/magicctapipe/reco/estimators.py @@ -13,11 +13,6 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -TEL_NAMES = { - 1: "LST-1", - 2: "MAGIC-I", - 3: "MAGIC-II", -} # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) class EnergyRegressor: @@ -26,6 +21,8 @@ class EnergyRegressor: Parameters ---------- + TEL_NAMES : dict + Dictionary with telescope IDs and names settings : dict Settings of RF regressors features : list @@ -34,12 +31,14 @@ class EnergyRegressor: If `True`, it trains RFs with unsigned features """ - def __init__(self, settings={}, features=[], use_unsigned_features=None): + def __init__(self, TEL_NAMES, settings={}, features=[], use_unsigned_features=None): """ Constructor of the class. Parameters ---------- + TEL_NAMES : dict + Dictionary with telescope IDs and names. settings : dict Settings of RF regressors features : list @@ -48,6 +47,7 @@ def __init__(self, settings={}, features=[], use_unsigned_features=None): If `True`, it trains RFs with unsigned features """ + self.TEL_NAMES = TEL_NAMES self.settings = settings self.features = features self.use_unsigned_features = use_unsigned_features @@ -84,7 +84,7 @@ def fit(self, event_data): regressor = sklearn.ensemble.RandomForestRegressor(**self.settings) # Train a telescope RF - logger.info(f"Training a {TEL_NAMES[tel_id]} RF...") + logger.info(f"Training a {self.TEL_NAMES[tel_id]} RF...") regressor.fit(x_train, y_train) self.telescope_rfs[tel_id] = regressor @@ -187,6 +187,8 @@ class DispRegressor: Parameters ---------- + TEL_NAMES : dict + Dictionary with telescope IDs and names settings : dict Settings of RF regressors features : list @@ -195,12 +197,14 @@ class DispRegressor: If `True`, it trains RFs with unsigned features """ - def __init__(self, settings={}, features=[], use_unsigned_features=None): + def __init__(self, TEL_NAMES, settings={}, features=[], use_unsigned_features=None): """ Constructor of the class. Parameters ---------- + TEL_NAMES : dict + Dictionary with telescope IDs and names settings : dict Settings of RF regressors features : list @@ -209,6 +213,7 @@ def __init__(self, settings={}, features=[], use_unsigned_features=None): If `True`, it trains RFs with unsigned features """ + self.TEL_NAMES = TEL_NAMES self.settings = settings self.features = features self.use_unsigned_features = use_unsigned_features @@ -244,7 +249,7 @@ def fit(self, event_data): regressor = sklearn.ensemble.RandomForestRegressor(**self.settings) # Train a telescope RF - logger.info(f"Training a {TEL_NAMES[tel_id]} RF...") + logger.info(f"Training a {self.TEL_NAMES[tel_id]} RF...") regressor.fit(x_train, y_train) self.telescope_rfs[tel_id] = regressor @@ -345,6 +350,8 @@ class EventClassifier: Parameters ---------- + TEL_NAMES : dict + Dictionary with telescope IDs and names settings : dict Settings of RF classifiers features : list @@ -353,12 +360,14 @@ class EventClassifier: If `True`, it trains RFs with unsigned features """ - def __init__(self, settings={}, features=[], use_unsigned_features=None): + def __init__(self, TEL_NAMES, settings={}, features=[], use_unsigned_features=None): """ Constructor of the class. Parameters ---------- + TEL_NAMES : dict + Dictionary with telescope IDs and names settings : dict Settings of RF classifiers features : list @@ -367,6 +376,7 @@ def __init__(self, settings={}, features=[], use_unsigned_features=None): If `True`, it trains RFs with unsigned features """ + self.TEL_NAMES = TEL_NAMES self.settings = settings self.features = features self.use_unsigned_features = use_unsigned_features @@ -402,7 +412,7 @@ def fit(self, event_data): classifier = sklearn.ensemble.RandomForestClassifier(**self.settings) # Train a telescope RF - logger.info(f"Training a {TEL_NAMES[tel_id]} RF...") + logger.info(f"Training a {self.TEL_NAMES[tel_id]} RF...") classifier.fit(x_train, y_train) self.telescope_rfs[tel_id] = classifier diff --git a/magicctapipe/resources/test_config.yaml b/magicctapipe/resources/test_config.yaml index 04b04a732..fd4f4d863 100644 --- a/magicctapipe/resources/test_config.yaml +++ b/magicctapipe/resources/test_config.yaml @@ -196,7 +196,7 @@ event_classifier: create_irf: quality_cuts: "disp_diff_mean < 0.22" - event_type: "software" # select "software", "software_only_3tel", "magic_only" or "hardware" + event_type: "software" # select "software", "trigger_3tels_or_more", "trigger_no_magic_stereo", "magic_only" or "hardware" weight_type_dl2: "simple" # select "simple", "variance" or "intensity" obs_time_irf: "50 h" # used when creating a background HDU diff --git a/magicctapipe/resources/test_config_monly.yaml b/magicctapipe/resources/test_config_monly.yaml index 8fba5ac2a..384bc9d6f 100644 --- a/magicctapipe/resources/test_config_monly.yaml +++ b/magicctapipe/resources/test_config_monly.yaml @@ -197,7 +197,7 @@ event_classifier: create_irf: quality_cuts: "disp_diff_mean < 0.22" - event_type: "magic_only" # select "software", "software_only_3tel", "magic_only" or "hardware" + event_type: "magic_only" # select "software", "trigger_3tels_or_more", "trigger_no_magic_stereo", "magic_only" or "hardware" weight_type_dl2: "simple" # select "simple", "variance" or "intensity" obs_time_irf: "50 h" # used when creating a background HDU diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_create_irf.py b/magicctapipe/scripts/lst1_magic/lst1_magic_create_irf.py index 25e81920f..0b6d71faf 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_create_irf.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_create_irf.py @@ -17,16 +17,12 @@ IRFs, which allows us to perform the 1D spectral analysis even if only diffuse data is available for test MCs. -There are four different event types with which the IRFs are created. -The "hardware" type is supposed for the hardware trigger between LST-1 -and MAGIC, allowing for the events of all the telescope combinations. -The "software(_only_3tel)" types are supposed for the software event -coincidence with LST-mono and MAGIC-stereo observations, allowing for -only the events triggering both M1 and M2. The "software" type allows -for the events of the any 2-tel combinations except the MAGIC-stereo -combination at the moment. The "software_only_3tel" type allows for only -the events of the 3-tel combination. The "magic_only" type allows for -only the events of the MAGIC-stereo combination. +There are five different event types with which the IRFs are created: +- 'software': Stadard MAGIC+LST-1 case; remove magic-only events +- 'trigger_3tels_or_more': at least three telescopes triggered +- 'trigger_no_magic_stereo': no need for both MAGIC to trigger, to be used if more than one LST is used; remove MAGIC-only data if they exist +- 'magic_only': only M1_M2 events selected +- 'hardware': hardware trigger There are two types of gammaness and theta cuts, "global" and "dynamic". In case of the dynamic cuts, the optimal cut satisfying a given @@ -124,7 +120,7 @@ def create_irf( logger.info(f"\nInput gamma MC DL2 data file: {input_file_gamma}") event_table_gamma, pnt_gamma, sim_info_gamma = load_mc_dl2_data_file( - input_file_gamma, quality_cuts, event_type, weight_type_dl2 + config, input_file_gamma, quality_cuts, event_type, weight_type_dl2 ) viewcone = sim_info_gamma.viewcone_max.to_value( "deg" @@ -199,7 +195,7 @@ def create_irf( logger.info(f"\nInput proton MC DL2 data file: {input_file_proton}") event_table_proton, pnt_proton, sim_info_proton = load_mc_dl2_data_file( - input_file_proton, quality_cuts, event_type, weight_type_dl2 + config, input_file_proton, quality_cuts, event_type, weight_type_dl2 ) if any(pnt_proton != pnt_gamma): @@ -212,7 +208,7 @@ def create_irf( logger.info(f"\nInput electron MC DL2 data file: {input_file_electron}") event_table_electron, pnt_electron, sim_info_electron = load_mc_dl2_data_file( - input_file_electron, quality_cuts, event_type, weight_type_dl2 + config, input_file_electron, quality_cuts, event_type, weight_type_dl2 ) if any(pnt_electron != pnt_gamma): diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_dl1_stereo_to_dl2.py b/magicctapipe/scripts/lst1_magic/lst1_magic_dl1_stereo_to_dl2.py index aa03fddc2..cd2551991 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_dl1_stereo_to_dl2.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_dl1_stereo_to_dl2.py @@ -25,12 +25,17 @@ import numpy as np import pandas as pd +import yaml from astropy import units as u from astropy.coordinates import AltAz, SkyCoord, angular_separation from ctapipe.coordinates import TelescopeFrame from ctapipe.instrument import SubarrayDescription -from magicctapipe.io import get_stereo_events_old, save_pandas_data_in_table +from magicctapipe.io import ( + get_stereo_events, + save_pandas_data_in_table, + telescope_combinations, +) from magicctapipe.reco import DispRegressor, EnergyRegressor, EventClassifier __all__ = ["apply_rfs", "reconstruct_arrival_direction", "dl1_stereo_to_dl2"] @@ -38,15 +43,9 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -TEL_COMBINATIONS = { - "LST1_M1": [1, 2], # combo_type = 0 - "LST1_M1_M2": [1, 2, 3], # combo_type = 1 - "LST1_M2": [1, 3], # combo_type = 2 - "M1_M2": [2, 3], # combo_type = 3 -} # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) -def apply_rfs(event_data, estimator): +def apply_rfs(event_data, estimator, config): """ Applies trained RFs to DL1-stereo events, whose telescope combination type is same as the RFs. @@ -57,6 +56,8 @@ def apply_rfs(event_data, estimator): Data frame of shower events estimator : magicctapipe.reco.estimator Trained regressor or classifier + config : dict + Evoked from an yaml file with information about the telescope IDs. Returns ------- @@ -66,9 +67,8 @@ def apply_rfs(event_data, estimator): tel_ids = list(estimator.telescope_rfs.keys()) - # Extract the events of the same telescope combination type - combo_type = list(TEL_COMBINATIONS.values()).index(tel_ids) - df_events = event_data.query(f"combo_type == {combo_type}") + # Extract the events with the same telescope ID + df_events = event_data.query(f"tel_id == {tel_ids[0]}") # Apply the RFs reco_params = estimator.predict(df_events) @@ -76,7 +76,7 @@ def apply_rfs(event_data, estimator): return reco_params -def reconstruct_arrival_direction(event_data, tel_descriptions): +def reconstruct_arrival_direction(event_data, tel_descriptions, config): """ Reconstructs the arrival directions of shower events with the MARS-like DISP method. @@ -87,6 +87,8 @@ def reconstruct_arrival_direction(event_data, tel_descriptions): Data frame of shower events tel_descriptions : dict Telescope descriptions + config : dict + Dictionary with telescope IDs information Returns ------- @@ -95,6 +97,7 @@ def reconstruct_arrival_direction(event_data, tel_descriptions): """ params_with_flips = pd.DataFrame() + _, TEL_COMBINATIONS = telescope_combinations(config) # First of all, we reconstruct the directions of all the head and # tail candidates for every telescope image, i.e., the directions @@ -247,7 +250,7 @@ def reconstruct_arrival_direction(event_data, tel_descriptions): return reco_params -def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): +def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir, config): """ Processes DL1-stereo events and reconstructs the DL2 parameters with trained RFs. @@ -260,8 +263,11 @@ def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): Path to a directory where trained RFs are stored output_dir : str Path to a directory where to save an output DL2 data file + config : dict + Dictionary with telescope IDs information """ + TEL_NAMES, _ = telescope_combinations(config) # Load the input DL1-stereo data file logger.info(f"\nInput DL1-stereo data file: {input_file_dl1}") @@ -272,7 +278,7 @@ def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): is_simulation = "true_energy" in event_data.columns logger.info(f"\nIs simulation: {is_simulation}") - event_data = get_stereo_events_old(event_data) + event_data = get_stereo_events(event_data, config) subarray = SubarrayDescription.from_hdf(input_file_dl1) tel_descriptions = subarray.tel @@ -295,11 +301,11 @@ def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): for input_file_energy in input_files_energy: logger.info(f"Applying {input_file_energy}...") - energy_regressor = EnergyRegressor() + energy_regressor = EnergyRegressor(TEL_NAMES) energy_regressor.load(input_file_energy) # Apply the RFs - reco_params = apply_rfs(event_data, energy_regressor) + reco_params = apply_rfs(event_data, energy_regressor, config) event_data.loc[reco_params.index, reco_params.columns] = reco_params del energy_regressor @@ -316,17 +322,19 @@ def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): for input_file_disp in input_files_dips: logger.info(f"Applying {input_file_disp}...") - disp_regressor = DispRegressor() + disp_regressor = DispRegressor(TEL_NAMES) disp_regressor.load(input_file_disp) # Apply the RFs - reco_params = apply_rfs(event_data, disp_regressor) + reco_params = apply_rfs(event_data, disp_regressor, config) event_data.loc[reco_params.index, reco_params.columns] = reco_params # Reconstruct the arrival directions with the DISP method logger.info("\nReconstructing the arrival directions...") - reco_params = reconstruct_arrival_direction(event_data, tel_descriptions) + reco_params = reconstruct_arrival_direction( + event_data, tel_descriptions, config + ) event_data.loc[reco_params.index, reco_params.columns] = reco_params del disp_regressor @@ -343,11 +351,11 @@ def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): for input_file_class in input_files_class: logger.info(f"Applying {input_file_class}...") - event_classifier = EventClassifier() + event_classifier = EventClassifier(TEL_NAMES) event_classifier.load(input_file_class) # Apply the RFs - reco_params = apply_rfs(event_data, event_classifier) + reco_params = apply_rfs(event_data, event_classifier, config) event_data.loc[reco_params.index, reco_params.columns] = reco_params del event_classifier @@ -396,7 +404,7 @@ def dl1_stereo_to_dl2(input_file_dl1, input_dir_rfs, output_dir): def main(): - """Main function.""" + """Main function""" start_time = time.time() parser = argparse.ArgumentParser() @@ -428,10 +436,20 @@ def main(): help="Path to a directory where to save an output DL2 data file", ) - args = parser.parse_args() + parser.add_argument( + "--config-file", + "-c", + dest="config_file", + type=str, + default="./config_general.yaml", + help="Path to a configuration file", + ) + args = parser.parse_args() + with open(args.config_file, "rb") as f: + config = yaml.safe_load(f) # Process the input data - dl1_stereo_to_dl2(args.input_file_dl1, args.input_dir_rfs, args.output_dir) + dl1_stereo_to_dl2(args.input_file_dl1, args.input_dir_rfs, args.output_dir, config) logger.info("\nDone.") diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_dl2_to_dl3.py b/magicctapipe/scripts/lst1_magic/lst1_magic_dl2_to_dl3.py index 3a3168409..7ac21ca04 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_dl2_to_dl3.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_dl2_to_dl3.py @@ -99,7 +99,7 @@ def dl2_to_dl3(input_file_dl2, input_dir_irf, output_dir, config): dl2_weight_type = extra_header["DL2_WEIG"] event_table, on_time, deadc = load_dl2_data_file( - input_file_dl2, quality_cuts, event_type, dl2_weight_type + config, input_file_dl2, quality_cuts, event_type, dl2_weight_type ) # Calculate the mean pointing direction for the target point of the @@ -370,7 +370,7 @@ def dl2_to_dl3(input_file_dl2, input_dir_irf, output_dir, config): # Create an event HDU logger.info("\nCreating an event HDU...") - event_hdu = create_event_hdu(event_table, on_time, deadc, **config_dl3) + event_hdu = create_event_hdu(event_table, config, on_time, deadc, **config_dl3) hdus.append(event_hdu) diff --git a/magicctapipe/scripts/lst1_magic/lst1_magic_train_rfs.py b/magicctapipe/scripts/lst1_magic/lst1_magic_train_rfs.py index 7fcfdefe9..40a6133a0 100644 --- a/magicctapipe/scripts/lst1_magic/lst1_magic_train_rfs.py +++ b/magicctapipe/scripts/lst1_magic/lst1_magic_train_rfs.py @@ -41,7 +41,11 @@ import pandas as pd import yaml -from magicctapipe.io import format_object, load_train_data_files +from magicctapipe.io import ( + format_object, + load_train_data_files_tel, + telescope_combinations, +) from magicctapipe.io.io import GROUP_INDEX_TRAIN from magicctapipe.reco import DispRegressor, EnergyRegressor, EventClassifier @@ -55,12 +59,6 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) -TEL_NAMES = { - 1: "LST-1", - 2: "MAGIC-I", - 3: "MAGIC-II", -} # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) - # True event class of gamma and proton MCs EVENT_CLASS_GAMMA = 0 @@ -114,12 +112,13 @@ def train_energy_regressor(input_dir, output_dir, config, use_unsigned_features= output_dir : str Path to a directory where to save trained RFs config : dict - Configuration for the LST-1 + MAGIC analysis + Configuration for the LST + MAGIC analysis use_unsigned_features : bool If `True`, it uses unsigned features for training RFs """ config_rf = config["energy_regressor"] + TEL_NAMES, _ = telescope_combinations(config) gamma_offaxis = config_rf["gamma_offaxis"] @@ -129,10 +128,9 @@ def train_energy_regressor(input_dir, output_dir, config, use_unsigned_features= # Load the input files logger.info(f"\nInput directory: {input_dir}") - event_data_train = load_train_data_files( - input_dir, gamma_offaxis["min"], gamma_offaxis["max"] + event_data_train = load_train_data_files_tel( + input_dir, config, gamma_offaxis["min"], gamma_offaxis["max"] ) - # Configure the energy regressor logger.info("\nRF regressors:") logger.info(format_object(config_rf["settings"])) @@ -141,34 +139,35 @@ def train_energy_regressor(input_dir, output_dir, config, use_unsigned_features= logger.info(format_object(config_rf["features"])) logger.info(f"\nUse unsigned features: {use_unsigned_features}") - + logger.info(f"\nconfiguration file: {config}") + logger.info(f'\nmc_tel_ids: {config["mc_tel_ids"]}') energy_regressor = EnergyRegressor( - config_rf["settings"], config_rf["features"], use_unsigned_features + TEL_NAMES, config_rf["settings"], config_rf["features"], use_unsigned_features ) # Create the output directory Path(output_dir).mkdir(exist_ok=True, parents=True) # Loop over every telescope combination type - for tel_combo, df_train in event_data_train.items(): - logger.info(f"\nEnergy regressors for the '{tel_combo}' type:") + for tel_id, df_train in event_data_train.items(): + logger.info(f"\nEnergy regressors for the telescope ID '{tel_id}' :") # Train the RFs energy_regressor.fit(df_train) # Check the feature importance - for tel_id, telescope_rf in energy_regressor.telescope_rfs.items(): - importances = telescope_rf.feature_importances_.round(5) - importances = dict(zip(energy_regressor.features, importances)) + telescope_rf = energy_regressor.telescope_rfs[tel_id] - logger.info(f"\n{TEL_NAMES[tel_id]} feature importance:") - logger.info(format_object(importances)) + importances = telescope_rf.feature_importances_.round(5) + importances = dict(zip(energy_regressor.features, importances)) + logger.info(f"\n{TEL_NAMES[tel_id]} feature importance:") + logger.info(format_object(importances)) # Save the trained RFs if use_unsigned_features: - output_file = f"{output_dir}/energy_regressors_{tel_combo}_unsigned.joblib" + output_file = f"{output_dir}/energy_regressors_{tel_id}_unsigned.joblib" else: - output_file = f"{output_dir}/energy_regressors_{tel_combo}.joblib" + output_file = f"{output_dir}/energy_regressors_{tel_id}.joblib" energy_regressor.save(output_file) @@ -192,6 +191,7 @@ def train_disp_regressor(input_dir, output_dir, config, use_unsigned_features=Fa """ config_rf = config["disp_regressor"] + TEL_NAMES, _ = telescope_combinations(config) gamma_offaxis = config_rf["gamma_offaxis"] @@ -201,8 +201,8 @@ def train_disp_regressor(input_dir, output_dir, config, use_unsigned_features=Fa # Load the input files logger.info(f"\nInput directory: {input_dir}") - event_data_train = load_train_data_files( - input_dir, gamma_offaxis["min"], gamma_offaxis["max"] + event_data_train = load_train_data_files_tel( + input_dir, config, gamma_offaxis["min"], gamma_offaxis["max"] ) # Configure the DISP regressor @@ -215,33 +215,32 @@ def train_disp_regressor(input_dir, output_dir, config, use_unsigned_features=Fa logger.info(f"\nUse unsigned features: {use_unsigned_features}") disp_regressor = DispRegressor( - config_rf["settings"], config_rf["features"], use_unsigned_features + TEL_NAMES, config_rf["settings"], config_rf["features"], use_unsigned_features ) # Create the output directory Path(output_dir).mkdir(exist_ok=True, parents=True) # Loop over every telescope combination type - for tel_combo, df_train in event_data_train.items(): - logger.info(f"\nDISP regressors for the '{tel_combo}' type:") + for tel_id, df_train in event_data_train.items(): + logger.info(f"\nDISP regressors for the telescope ID '{tel_id}':") # Train the RFs disp_regressor.fit(df_train) # Check the feature importance - for tel_id, telescope_rf in disp_regressor.telescope_rfs.items(): - importances = telescope_rf.feature_importances_.round(5) - importances = dict(zip(disp_regressor.features, importances)) + telescope_rf = disp_regressor.telescope_rfs[tel_id] - logger.info(f"\n{TEL_NAMES[tel_id]} feature importance:") - logger.info(format_object(importances)) + importances = telescope_rf.feature_importances_.round(5) + importances = dict(zip(disp_regressor.features, importances)) + logger.info(f"\n{TEL_NAMES[tel_id]} feature importance:") + logger.info(format_object(importances)) # Save the trained RFs to an output file if use_unsigned_features: - output_file = f"{output_dir}/disp_regressors_{tel_combo}_unsigned.joblib" + output_file = f"{output_dir}/disp_regressors_{tel_id}_unsigned.joblib" else: - output_file = f"{output_dir}/disp_regressors_{tel_combo}.joblib" - + output_file = f"{output_dir}/disp_regressors_{tel_id}.joblib" disp_regressor.save(output_file) logger.info(f"\nOutput file: {output_file}") @@ -268,6 +267,7 @@ def train_event_classifier( """ config_rf = config["event_classifier"] + TEL_NAMES, _ = telescope_combinations(config) gamma_offaxis = config_rf["gamma_offaxis"] @@ -277,15 +277,19 @@ def train_event_classifier( # Load the input gamma MC data files logger.info(f"\nInput gamma MC directory: {input_dir_gamma}") - event_data_gamma = load_train_data_files( - input_dir_gamma, gamma_offaxis["min"], gamma_offaxis["max"], EVENT_CLASS_GAMMA + event_data_gamma = load_train_data_files_tel( + input_dir_gamma, + config, + gamma_offaxis["min"], + gamma_offaxis["max"], + EVENT_CLASS_GAMMA, ) # Load the input proton MC data files logger.info(f"\nInput proton MC directory: {input_dir_proton}") - event_data_proton = load_train_data_files( - input_dir_proton, true_event_class=EVENT_CLASS_PROTON + event_data_proton = load_train_data_files_tel( + input_dir_proton, config, true_event_class=EVENT_CLASS_PROTON ) # Configure the event classifier @@ -298,7 +302,7 @@ def train_event_classifier( logger.info(f"\nUse unsigned features: {use_unsigned_features}") event_classifier = EventClassifier( - config_rf["settings"], config_rf["features"], use_unsigned_features + TEL_NAMES, config_rf["settings"], config_rf["features"], use_unsigned_features ) # Create the output directory @@ -307,11 +311,11 @@ def train_event_classifier( # Loop over every telescope combination type common_combinations = set(event_data_gamma.keys()) & set(event_data_proton.keys()) - for tel_combo in sorted(common_combinations): - logger.info(f"\nEvent classifiers for the '{tel_combo}' type:") + for tel_id in sorted(common_combinations): + logger.info(f"\nEvent classifiers for the telescope ID '{tel_id}':") - df_gamma = event_data_gamma[tel_combo] - df_proton = event_data_proton[tel_combo] + df_gamma = event_data_gamma[tel_id] + df_proton = event_data_proton[tel_id] # Adjust the number of training samples n_events_gamma = len(df_gamma.groupby(GROUP_INDEX_TRAIN).size()) @@ -331,18 +335,18 @@ def train_event_classifier( event_classifier.fit(df_train) # Check the feature importance - for tel_id, telescope_rf in event_classifier.telescope_rfs.items(): - importances = telescope_rf.feature_importances_.round(5) - importances = dict(zip(event_classifier.features, importances)) + telescope_rf = event_classifier.telescope_rfs[tel_id] - logger.info(f"\n{TEL_NAMES[tel_id]} feature importance:") - logger.info(format_object(importances)) + importances = telescope_rf.feature_importances_.round(5) + importances = dict(zip(event_classifier.features, importances)) + logger.info(f"\n{TEL_NAMES[tel_id]} feature importance:") + logger.info(format_object(importances)) # Save the trained RFs to an output file if use_unsigned_features: - output_file = f"{output_dir}/event_classifiers_{tel_combo}_unsigned.joblib" + output_file = f"{output_dir}/event_classifiers_{tel_id}_unsigned.joblib" else: - output_file = f"{output_dir}/event_classifiers_{tel_combo}.joblib" + output_file = f"{output_dir}/event_classifiers_{tel_id}.joblib" event_classifier.save(output_file) diff --git a/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py b/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py index c24d562ee..ac92621a0 100644 --- a/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py +++ b/magicctapipe/scripts/lst1_magic/magic_calib_to_dl1.py @@ -79,12 +79,6 @@ # The pedestal types to find bad RMS pixels PEDESTAL_TYPES = ["fundamental", "from_extractor", "from_extractor_rndm"] -TEL_COMBINATIONS = { - "LST1_M1": [1, 2], # combo_type = 0 - "LST1_M1_M2": [1, 2, 3], # combo_type = 1 - "LST1_M2": [1, 3], # combo_type = 2 - "M1_M2": [2, 3], # combo_type = 3 -} # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE) def magic_calib_to_dl1(