lsst-sitcom · taranu · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/bin.src/convert_truth_summary_v2_to_injection.py b/bin.src/convert_truth_summary_v2_to_injection.py
@@ -0,0 +1,176 @@
+import astropy.table
+import astropy.units as u
+import lsst.daf.butler as dafButler
+from lsst.daf.butler.formatters.parquet import arrow_to_astropy, astropy_to_arrow, pa, pq
+import numpy as np
+
+truth_summary_path = "/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_summary_cell"
+
+plot = False
+
+bands = ("u", "g", "r", "i", "z", "y")
+mag_total_min_star = 17.5
+mag_total_min_galaxy = 15
+mag_total_max = 26.5
+
+tract_in = 3828
+tract_out = 9813
+
+skymap_name_in = "DC2_cells_v1"
+skymap_name_out = "hsc_rings_v1"
+
+butler_in = dafButler.Butler("/repo/dc2")
+butler_out = dafButler.Butler("/repo/main", collections=["HSC/runs/RC2/w_2024_38/DM-46429"])
+
+skymap_in, skymap_out = (
+    butler.get("skyMap", skymap=skymap_name, collections="skymaps")
+    for butler, skymap_name in ((butler_in, skymap_name_in), (butler_out, skymap_name_out))
+)
+
+tractinfo_in, tractinfo_out = (
+    skymap[tract] for skymap, tract in ((skymap_in, tract_in), (skymap_out, tract_out))
+)
+
+(cen_ra_in, cen_dec_in), (cen_ra_out, cen_dec_out) = (
+    (x.asDegrees() for x in tractinfo.ctr_coord) for tractinfo in (tractinfo_in, tractinfo_out)
+)
+
+truth_summary = arrow_to_astropy(pq.read_table(
+    f"{truth_summary_path}/truth_summary_v2_{tract_in}_{skymap_name_in}_2_2i_truth_summary.parq"
+))
+
+# Dump some unneeded columns
+for column in ((
+    "id_string", "host_galaxy", "redshift", "A_V", "R_V",
+    "tract", "patch", "cosmodc2_hp", "cosmodc2_id",
+    "ra_unlensed", "dec_unlensed", "redshift_Hubble",
+)):
+    if column in truth_summary.colnames:
+        del truth_summary[column]
+
+# Stars and galaxies only; no SN
+truth_galaxy = truth_summary["truth_type"] == 1
+truth_star = truth_summary["truth_type"] == 2
+truth_good = truth_star | truth_galaxy
+truth_summary = truth_summary[truth_good]
+
+# Cut out very faint objects
+flux_total = np.sum([truth_summary[f"flux_{band}"] for band in "ugrizy"], axis=0)
+mag_total = u.nJy.to(u.ABmag, flux_total)
+truth_out = truth_summary[
+    (mag_total > (mag_total_min_star*truth_star[truth_good] + mag_total_min_galaxy*truth_galaxy[truth_good]))
+    & (mag_total < mag_total_max)
+]
+
+ra_in, dec_in = (truth_out[col] for col in ("ra", "dec"))
+dec_out = dec_in + cen_dec_out - cen_dec_in
+ra_out = cen_ra_out + (ra_in - cen_ra_in)*np.cos(dec_in*np.pi/180)/np.cos(dec_out*np.pi/180)
+
+if plot:
+    import matplotlib.pyplot as plt
+    objects = butler_out.get(
+        "objectTable_tract", skymap=skymap_name_out, tract=tract_out,
+        parameters={"columns": ["coord_ra", "coord_dec"]}, storageClass="ArrowAstropy",
+    )
+    plt.scatter(ra_out[::20], dec_out[::20], s=1.5)
+    plt.scatter(objects["coord_ra"][::20], objects["coord_dec"][::20], s=1.5)
+    plt.show()
+
+is_star = truth_out["truth_type"] == 2
+is_galaxy = truth_out["truth_type"] == 1
+
+n_star = np.sum(is_star)
+n_galaxy = np.sum(is_galaxy)
+
+
+def concatenate_star_galaxy(values, is_star, is_galaxy):
+    values_galaxy = values[is_galaxy]
+    return np.concatenate((values[is_star], values_galaxy, values_galaxy))
+
+
+source_types = ["DeltaFunction"]*n_star
+source_types.extend(["Sersic"]*(2*n_galaxy))
+
+mask_star = np.concatenate((np.ones(n_star, dtype=bool), np.zeros(2*n_galaxy, dtype=bool)))
+
+# GalSim convention is x-axis rather than y-axis
+positionAngles = 90 + truth_out["positionAngle"][is_galaxy]
+positionAngles[positionAngles < 0] += 180
+
+data_out = {
+    "injection_id": np.arange(2*n_galaxy + n_star),
+    "group_id": concatenate_star_galaxy(truth_out["id"], is_star, is_galaxy),
+    "ra": concatenate_star_galaxy(ra_out, is_star, is_galaxy),
+    "dec": concatenate_star_galaxy(dec_out, is_star, is_galaxy),
+    "source_type": source_types,
+    "n": np.ma.masked_array(
+        np.concatenate((
+            np.zeros(n_star, dtype=float),
+            np.ones(n_galaxy, dtype=float),
+            np.full(n_galaxy, 4.0, dtype=float),
+        )),
+        mask=mask_star,
+        fill_value=np.nan,
+    ),
+    "half_light_radius": np.ma.masked_array(
+        np.concatenate((
+            np.zeros(n_star, dtype=float),
+            truth_out["diskMajorAxisArcsec"][is_galaxy]*np.sqrt(truth_out["diskAxisRatio"][is_galaxy]),
+            truth_out["spheroidMajorAxisArcsec"][is_galaxy]*np.sqrt(
+                truth_out["spheroidAxisRatio"][is_galaxy]
+            ),
+        )),
+        mask=mask_star,
+        fill_value=0.,
+    ),
+    "q": np.ma.masked_array(
+        np.concatenate((
+            np.ones(n_star, dtype=float),
+            truth_out["diskAxisRatio"][is_galaxy],
+            truth_out["spheroidAxisRatio"][is_galaxy],
+        )),
+        mask=mask_star,
+        fill_value=1.,
+    ),
+    "beta": np.ma.masked_array(
+        np.concatenate((
+            np.ones(n_star, dtype=float),
+            positionAngles,
+            positionAngles,
+        )),
+        mask=mask_star,
+        fill_value=0.,
+    ),
+}
+
+table_out = astropy.table.Table(data_out)
+
+for column, (description, unit) in {
+    "injection_id": ("Injection object ID (row number)", None),
+    "group_id": ("Injection object ID (row number)", None),
+    "ra": ("Right ascension", "deg"),
+    "dec": ("Declination", "deg"),
+    "source_type": ("Injection source type", None),
+    "n": ("Sersic index", None),
+    "half_light_radius": ("Sersic half-light radius [sqrt(a*b)]", u.arcsec),
+    "q": ("Minor-to-major axis ratio", None),
+    "beta": ("Position angle", u.deg),
+}.items():
+    column = table_out[column]
+    column.description = description
+    if unit is not None:
+        column.unit = unit
+
+for band in bands:
+    bulgefrac = truth_out[f"bulge_to_total_{band}"].data.data[is_galaxy]
+    fluxes = truth_out[f"flux_{band}"]
+    table_out["mag"] = np.concatenate((
+        fluxes[is_star].to(u.ABmag),
+        (fluxes[is_galaxy]*(1-bulgefrac)).to(u.ABmag),
+        (fluxes[is_galaxy]*bulgefrac).to(u.ABmag),
+    ))
+    table_out["mag"].description = f"{band}-band magnitude"
+    table_out["mag"].unit = u.ABmag
+    filename = f"injection_catalog_{skymap_name_out}_{tract_out}_from_{skymap_name_in}_{band}_{tract_in}.parq"
+    print(f"Writing {filename}")
+    pq.write_table(astropy_to_arrow(table_out), filename)
diff --git a/bin.src/make_truth_summary_v2.py b/bin.src/make_truth_summary_v2.py
@@ -5,6 +5,7 @@
 from lsst.daf.butler.formatters.parquet import arrow_to_astropy, astropy_to_arrow, pa, pq
 from lsst.geom import SpherePoint, degrees
 import GCRCatalogs
+# This should be used but doesn't seem to work
 # from GCRCatalogs.helpers.tract_catalogs import tract_filter
 import numpy as np
 
@@ -15,22 +16,50 @@
 # astropy.__version__
 
 do_mags = False
-
-butler = dafButler.Butler("/repo/dc2")
-name_skymap = "DC2_cells_v1"
-skymap = butler.get("skyMap", skymap=name_skymap, collections="skymaps")
-
-GCRCatalogs.set_root_dir('/sdf/data/rubin/user/combet')
+# Enable to write a catalog for ci_imsim's single patch
+is_ci_imsim = False
+
+patches_tracts = {}
+tracts_out = {}
+if is_ci_imsim:
+    import os
+    butler = dafButler.Butler(f"{os.environ['CI_IMSIM_DIR']}/DATA")
+    name_skymap = "discrete/ci_imsim/4k"
+    name_out = "ci_imsim"
+    skymap = butler.get("skyMap", skymap=name_skymap, collections="skymaps")
+    tracts = (3828,)
+    patches_tracts[tracts[0]] = (24,)
+    tracts_out[tracts[0]] = 0
+else:
+    butler = dafButler.Butler("/repo/dc2")
+    name_skymap = "DC2_cells_v1"
+    name_out = name_skymap
+    skymap = butler.get("skyMap", skymap=name_skymap, collections="skymaps")
+    tracts = (3828, 3829)
+    patches_tracts = {}
+    tract_out = {}
+
+path_truth_old = "/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_summary_cell"
+GCRCatalogs.set_root_dir('/sdf/data/rubin/shared')
 print(f"root dir={GCRCatalogs.get_root_dir()}")
 
 truth = GCRCatalogs.load_catalog('desc_dc2_run2.2i_dr6_truth')
-tracts = truth.available_tracts
-print(f"Available tracts: {tracts}")
+tracts_available = truth.available_tracts
+unavailable = set(tracts).difference(set(tracts_available))
+if unavailable:
+    raise RuntimeError(f"tracts={unavailable} not in {tracts_available}")
 
-truth_quantities = truth.list_all_quantities(include_native=True)
-truth_columninfo = {tq: truth.get_quantity_info(tq) for tq in truth_quantities}
+truth_columninfo = truth.list_all_quantities(include_native=True, with_info=True)
 truth_columninfo['av'] = {'unit': 'mag'}
 truth_columninfo['rv'] = {'unit': 'mag'}
+truth_columninfo['is_pointsource'] = {
+    'description': 'Whether the object is a point source (unresolved)',
+    'unit': '',
+}
+truth_columninfo['is_variable'] = {
+    'description': 'Whether the object is a point source (unresolved)',
+    'unit': '',
+}
 
 cosmodc2_cat = GCRCatalogs.load_catalog("desc_cosmodc2")
 cosmodc2_quantities = cosmodc2_cat.list_all_quantities(include_native=True)
@@ -61,8 +90,10 @@
     'dec_true': 'dec_unlensed',
     'redshiftHubble': 'redshift_Hubble',
 }
+# Although there is a morphology/positionAngle column, it was not used in DC2
+# Instead, randomly-generated angles were used
 key_list = (
-    {"morphology/positionAngle": "positionAngle"},
+    {"position_angle_true_dc2": "positionAngle"},
     diskmorph_keys, spheroidmorph_keys,
     disklum_keys, spheroidlum_keys,
     mag_keys, other_keys
@@ -98,7 +129,7 @@
     }
 truth_columninfo["id_string"] = {"description": "Original string id", "unit": None}
 cosmodc2_columninfo = {
-    "morphology/positionAngle": {
+    "position_angle_true_dc2": {
         "description": "Position angle relative to north (+Dec) towards east (+RA)",
         "unit": "deg",
     },
@@ -132,14 +163,22 @@
     }
 }
 
-tracts = (3828, 3829)
-
 for tract in tracts:
     print(f"==== Reading default truth catalog for tract={tract} ====")
     # These already have new integer ID columns
     truth = arrow_to_astropy(pq.read_table(
-        f"/sdf/group/rubin/ncsa-project/project/shared/DC2/truth_summary_v2/truth_tract{tract}.parquet")
+        f"/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_summary_integer_ids/truth_tract{tract}.parquet")
     )
+    # For some reason, is_pointsource and is_variable aren't in the newer tables
+    truth_old = arrow_to_astropy(pq.read_table(
+        f"{path_truth_old}/truth_summary_{tract}_DC2_cells_v1_2_2i_truth_summary.parq",
+        columns=["id", "is_pointsource", "is_variable"],
+    ))
+    truth_old.rename_column("id", "id_string")
+    # There are a few rows different between these tables so we can't just
+    # copy columns. Not sure why...
+    truth = join(truth, truth_old, keys="id_string", join_type="inner")
+
     filt = truth["is_unique_truth_entry"]
     truth = truth[filt]
     if not do_mags:
@@ -174,9 +213,9 @@
     # Re-order columns
     cosmodc2 = cosmodc2[columns_load]
     for column in columns_load:
-        units = cosmodc2_cat.get_quantity_info(column)['units']
-        if units:
-            cosmodc2[column].units = units
+        if (quantinfo := cosmodc2_cat.get_quantity_info(column)) is not None:
+            if units := quantinfo.get('units'):
+                cosmodc2[column].units = units
     # Apply column info overrides
     for column_name, info in cosmodc2_columninfo.items():
         if (column := cosmodc2.columns.get(column_name)) is not None:
@@ -212,18 +251,27 @@
     )
 
     print("==== Populating patch column ====")
-    tractinfo = skymap[tract]
-    radecs = [SpherePoint(ra, dec, degrees) for ra, dec in zip(truth["ra"], truth["dec"])]
+    tract_out = tracts_out.get(tract, tract)
+    tractinfo = skymap[tract_out]
+    radecs = [SpherePoint(ra, dec, degrees) for ra, dec in zip(merged_table["ra"], merged_table["dec"])]
     patches = [tractinfo.findPatch(radec).sequential_index for radec in radecs]
     merged_table["patch"] = patches
     merged_table["patch"].description = f"The patch number in {skymap=}"
 
-    filename_out = f"truth_summary_v2_{tract}_{name_skymap}_2_2i_truth_summary.parq"
+    patches_tract = patches_tracts.get(tract)
+    if patches_tract:
+        patches = merged_table["patch"]
+        patch_good = np.zeros(len(merged_table), dtype=bool)
+        for patch in patches_tract:
+            patch_good |= patches == patch
+        merged_table = merged_table[patch_good]
+
+    filename_out = f"truth_summary_v2_{tract_out}_{name_out}_2_2i_truth_summary.parq"
     pq.write_table(astropy_to_arrow(merged_table), filename_out)
 
-    print("==== Done ====")
+    print(f"==== Wrote {filename_out} ====")
 
-    print("==== Read in new merged table file and check for units and descriptions ====")
+    print("==== Reading in new merged table file and check for units and descriptions ====")
 
     pa_table = pa.parquet.read_table(filename_out)
     ap_table = arrow_to_astropy(pa_table)