nasa-fornax · afaisst · Jun 18, 2024 · May 23, 2024 · May 30, 2024 · May 31, 2024
diff --git a/spectroscopy/code_src/desi_functions.py b/spectroscopy/code_src/desi_functions.py
@@ -5,6 +5,7 @@
 import astropy.units as u
 from astropy.coordinates import SkyCoord
 from astropy.table import Table
+from astropy import nddata
 
 import pandas as pd
 
@@ -46,8 +47,8 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec):
     for stab in sample_table:
 
         ## Search
-        data_releases = ['DESI-EDR','BOSS-DR16']
-        #data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']
+        #data_releases = ['DESI-EDR','BOSS-DR16']
+        data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']
 
         search_coords = stab["coord"]
         dra = (search_radius_arcsec*u.arcsec).to(u.degree)

diff --git a/spectroscopy/code_src/mast_functions.py b/spectroscopy/code_src/mast_functions.py
@@ -14,6 +14,236 @@
 
 from specutils import Spectrum1D
 
+import matplotlib.pyplot as plt
+
+
+def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
+    '''
+    Retrieves HST spectra for a list of sources and groups/stacks them.
+    This main function runs two sub-functions:
+    - JWST_get_spec_helper() which searches, downloads, retrieves the spectra
+    - JWST_group_spectra() which groups and stacks the spectra
+
+    Parameters
+    ----------
+    sample_table : `~astropy.table.Table`
+        Table with the coordinates and journal reference labels of the sources
+    search_radius_arcsec : `float`
+        Search radius in arcseconds.
+    datadir : `str`
+        Data directory where to store the data. Each function will create a
+        separate data directory (for example "[datadir]/HST/" for HST data).
+
+    Returns
+    -------
+    df_spec : MultiIndexDFObject
+        The main data structure to store all spectra
+
+    '''
+
+    ## Get the spectra
+    print("Searching and Downloading Spectra... ")
+    df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir)
+
+    ## Group
+    print("Grouping Spectra... ")
+    df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=True, quickplot=True)
+
+
+    return(df_jwst_group)
+
+
+def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
+    '''
+    Retrieves HST spectra for a list of sources.
+
+    Parameters
+    ----------
+    sample_table : `~astropy.table.Table`
+        Table with the coordinates and journal reference labels of the sources
+    search_radius_arcsec : `float`
+        Search radius in arcseconds.
+    datadir : `str`
+        Data directory where to store the data. Each function will create a
+        separate data directory (for example "[datadir]/HST/" for HST data).
+
+    Returns
+    -------
+    df_spec : MultiIndexDFObject
+        The main data structure to store all spectra
+
+    '''
+
+    ## Create directory
+    this_data_dir = os.path.join(datadir , "JWST/")
+
+
+    ## Initialize multi-index object:
+    df_spec = MultiIndexDFObject()
+
+    for stab in sample_table:
+
+        print("Processing source {}".format(stab["label"]))
+
+        ## Query results
+        search_coords = stab["coord"]
+        query_results = Observations.query_criteria(coordinates = search_coords, radius = search_radius_arcsec * u.arcsec,
+                                                dataproduct_type=["spectrum"], obs_collection=["JWST"], intentType="science", calib_level=[3,4],
+                                                #instrument_name=['NIRSPEC/MSA', 'NIRSPEC/SLIT', 'NIRCAM/GRISM', 'NIRISS/WFSS'],
+                                                instrument_name=['NIRSPEC/MSA', 'NIRSPEC/SLIT'],
+                                                #filters=['CLEAR;PRISM','F070LP;G140M'],
+                                                dataRights=['PUBLIC']
+                                               )
+        print("Number of search results: {}".format(len(query_results)))
+
+        if len(query_results) > 0: # found some spectra
+
+
+            ## Retrieve spectra
+            data_products_list = Observations.get_product_list(query_results)
+
+            ## Filter
+            data_products_list_filter = Observations.filter_products(data_products_list,
+                                                    productType=["SCIENCE"],
+                                                    #filters=['CLEAR;PRISM','F070LP;G140M'],
+                                                    #filters=['CLEAR;PRISM'],
+                                                    extension="fits",
+                                                    calib_level=[3,4], # only fully reduced or contributed
+                                                    productSubGroupDescription=["X1D"], # only 1D spectra
+                                                    dataRights=['PUBLIC'] # only public data
+                                                                    )
+            print("Number of files to download: {}".format(len(data_products_list_filter)))
+
+            if len(data_products_list_filter) > 0:
+
+                ## Download
+                download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
+
+
+                ## Create table
+                # NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
+                # have to "manually" get the product file names here and then use those to open the files.
+                keys = ["filters","obs_collection","instrument_name","calib_level","t_obs_release","proposal_id","obsid","objID","distance"]
+                tab = Table(names=keys + ["productFilename"] , dtype=[str,str,str,int,float,int,int,int,float]+[str])
+                for jj in range(len(data_products_list_filter)):
+                    idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
+                    tmp = query_results[idx_cross][keys]
+                    tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )
+                print("number in table {}".format(len(tab)))
+
+                ## Create multi-index object
+                for jj in range(len(tab)):
+
+                    # find correct path name:
+                    # Note that `download_results` does NOT have same order as `tab`!!
+                    file_idx = np.where( [ tab["productFilename"][jj] in download_results["Local Path"][iii] for iii in range(len(download_results))] )[0]
+
+                    # open spectrum
+                    filepath = download_results["Local Path"][file_idx[0]]
+                    print(filepath)
+                    spec1d = Spectrum1D.read(filepath)
+
+                    dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
+                                                 label=[stab["label"]],
+                                                 objectid=[stab["objectid"]],
+                                                 #objID=[tab["objID"][jj]], # REMOVE
+                                                 #obsid=[tab["obsid"][jj]],
+                                                 mission=[tab["obs_collection"][jj]],
+                                                 instrument=[tab["instrument_name"][jj]],
+                                                 filter=[tab["filters"][jj]],
+                                                )).set_index(["objectid", "label", "filter", "mission"])
+                    df_spec.append(dfsingle)
+
+            else:
+                print("Nothing to download for source {}.".format(stab["label"]))
+        else:
+            print("Source {} could not be found".format(stab["label"]))
+
+
+    return(df_spec)
+
+def JWST_group_spectra(df, verbose, quickplot):
+    '''
+    Groups the JWST spectra and removes entries that have no spectra. Stacks
+    spectra that are similar and creates new DF.
+
+    Parameters
+    ----------
+    df : MultiIndexDFObject
+        Raw JWST multi-index object (output from `JWST_get_spec()`).
+    verbose : bool
+        Flag for verbosity: `True` or `False`
+    quickplot : bool
+        If `True`, quick plots are made for each spectral group.
+
+    Returns
+    -------
+    df_cons : MultiIndexDFObject
+        Consolidated and grouped data structure storing the spectra.
+
+    '''
+
+    ## Initialize multi-index object:
+    df_spec = MultiIndexDFObject()
+
+    ## Create data table from DF.
+    tab = df.data.reset_index()
+
+    ## Get objects
+    objects_unique = np.unique(tab["label"])
+
+    for obj in objects_unique:
+        if verbose: print("++Processing Object {} ++".format(obj))
+
+        ## Get filters
+        filters_unique = np.unique(tab["filter"])
+        if verbose: print("List of filters in data frame: {}".format( " | ".join(filters_unique)) )
+
+        for filt in filters_unique:
+            if verbose: print("Processing {}: ".format(filt), end=" ")
+
+            sel = np.where( (tab["filter"] == filt) & (tab["label"] == obj) )[0]
+            #tab_sel = tab.copy()[sel]
+            tab_sel = tab.iloc[sel]
+            if verbose: print("Number of items: {}".format( len(sel)), end=" | ")
+
+            ## get good ones
+            sum_flux = np.asarray([np.nansum(tab_sel.iloc[iii]["flux"]).value for iii in range(len(tab_sel)) ])
+            idx_good = np.where(sum_flux > 0)[0]
+            if verbose: print("Number of good spectra: {}".format(len(idx_good)))
+
+            if len(idx_good) > 0:
+                ## Create wavelength grid
+                wave_grid = tab_sel.iloc[idx_good[0]]["wave"] # NEED TO BE MADE BETTER LATER
+
+                ## Interpolate fluxes
+                fluxes_int = np.asarray([ np.interp(wave_grid , tab_sel.iloc[idx]["wave"] , tab_sel.iloc[idx]["flux"]) for idx in idx_good ])
+                fluxes_units = [tab_sel.iloc[idx]["flux"].unit for idx in idx_good]
+                fluxes_stack = np.nanmedian(fluxes_int,axis=0)
+                if verbose: print("Units of fluxes for each spectrum: {}".format( ",".join([str(tt) for tt in fluxes_units]) ) )
+
+                ## Add to data frame
+                dfsingle = pd.DataFrame(dict(wave=[wave_grid * u.micrometer] , flux=[fluxes_stack * fluxes_units[0]], err=[np.repeat(0,len(fluxes_stack))],
+                                             label=[tab_sel["label"].iloc[0]],
+                                             objectid=[tab_sel["objectid"].iloc[0]],
+                                             mission=[tab_sel["mission"].iloc[0]],
+                                             instrument=[tab_sel["instrument"].iloc[0]],
+                                             filter=[tab_sel["filter"].iloc[0]],
+                                            )).set_index(["objectid", "label", "filter", "mission"])
+                df_spec.append(dfsingle)
+
+                ## Quick plot
+                if quickplot:
+                    tmp = np.percentile(fluxes_stack, q=(1,50,99) )
+                    plt.plot(wave_grid , fluxes_stack)
+                    plt.ylim(tmp[0],tmp[2])
+                    plt.xlabel(r"Observed Wavelength [$\mu$m]")
+                    plt.ylabel(r"Flux [Jy]")
+                    plt.show()
+
+
+    return(df_spec)
+
 def HST_get_spec(sample_table, search_radius_arcsec, datadir):
     '''
     Retrieves HST spectra for a list of sources.
@@ -73,21 +303,27 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
                 ## Download
                 download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
 
-
                 ## Create table
+                # NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
+                # have to "manually" get the product file names here and then use those to open the files.
                 keys = ["filters","obs_collection","instrument_name","calib_level","t_obs_release","proposal_id","obsid","objID","distance"]
-                tab = Table(names=keys , dtype=[str,str,str,int,float,int,int,int,float])
-                for jj in range(len(download_results)):
-                    tmp = query_results[query_results["obsid"] == data_products_list_filter["obsID"][jj]][keys]
-                    tab.add_row( list(tmp[0]) )
+                tab = Table(names=keys + ["productFilename"] , dtype=[str,str,str,int,float,int,int,int,float]+[str])
+                for jj in range(len(data_products_list_filter)):
+                    idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
+                    tmp = query_results[idx_cross][keys]
+                    tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )
 
                 ## Create multi-index object
                 for jj in range(len(tab)):
 
+                    # find correct path name:
+                    # Note that `download_results` does NOT have same order as `tab`!!
+                    file_idx = np.where( [ tab["productFilename"][jj] in download_results["Local Path"][iii] for iii in range(len(download_results))] )[0]
+
                     # open spectrum
-                    filepath = download_results[jj]["Local Path"]
+                    filepath = download_results["Local Path"][file_idx[0]]
                     print(filepath)
-                    spec1d = Spectrum1D.read(filepath)  
+                    spec1d = Spectrum1D.read(filepath)
 
                     dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
                                                  label=[stab["label"]],

diff --git a/spectroscopy/code_src/sample_selection.py b/spectroscopy/code_src/sample_selection.py
@@ -8,7 +8,7 @@
 #from astroquery.vizier import Vizier
 
 
-def clean_sample(coords_list, labels_list, verbose=1):
+def clean_sample(coords_list, labels_list, precision, verbose=1):
     """Makes a unique sample of skycoords and labels with no repeats. Attaches an object ID to the coords.
 
     Parameters
@@ -17,6 +17,8 @@ def clean_sample(coords_list, labels_list, verbose=1):
         list of Astropy SkyCoords derived from literature sources
     labels_list : list
         List of the first author name and publication year for tracking the sources
+    precision : float (astropy units)
+        Precision of matching/removing doubles. For example 0.5*u.arcsecond.
     verbose : int, optional
         Print out the length of the sample after applying this function
 
@@ -31,7 +33,7 @@ def clean_sample(coords_list, labels_list, verbose=1):
     # now join the table with itself within a defined radius.
     # We keep one set of original column names to avoid later need for renaming
     tjoin = join(sample_table, sample_table, keys='coord',
-                 join_funcs={'coord': join_skycoord(0.005 * u.deg)},
+                 join_funcs={'coord': join_skycoord(precision)},
                  uniq_col_name='{col_name}{table_name}', table_names=['', '_2'])
 
     # this join will return 4 entries for each redundant coordinate:

diff --git a/spectroscopy/code_src/sdss_functions.py b/spectroscopy/code_src/sdss_functions.py
@@ -14,7 +14,7 @@
 
 from specutils import Spectrum1D
 
-def SDSS_get_spec(sample_table, search_radius_arcsec):
+def SDSS_get_spec(sample_table, search_radius_arcsec, data_release):
     '''
     Retrieves SDSS spectra for a list of sources. Note that no data will
     be directly downloaded. All will be saved in cache.
@@ -25,6 +25,8 @@ def SDSS_get_spec(sample_table, search_radius_arcsec):
         Table with the coordinates and journal reference labels of the sources
     search_radius_arcsec : `float`
         Search radius in arcseconds.
+    data_release : `int`
+        SDSS data release (e.g., 17 or 18)
 
     Returns
     -------
@@ -42,10 +44,10 @@ def SDSS_get_spec(sample_table, search_radius_arcsec):
         ## Get Spectra for SDSS
         search_coords = stab["coord"]
 
-        xid = SDSS.query_region(search_coords, radius=search_radius_arcsec * u.arcsec, spectro=True)
+        xid = SDSS.query_region(search_coords, radius=search_radius_arcsec * u.arcsec, spectro=True, data_release=data_release)
 
         if str(type(xid)) != "<class 'NoneType'>":
-            sp = SDSS.get_spectra(matches=xid, show_progress=True)
+            sp = SDSS.get_spectra(matches=xid, show_progress=True , data_release=data_release)
 
             ## Get data
             wave = 10**sp[0]["COADD"].data.loglam * u.angstrom # only one entry because we only search for one xid at a time. Could change that?