Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spectroscopy Notebook Updates #281

Merged
merged 7 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions spectroscopy/code_src/desi_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import astropy.units as u
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy import nddata

import pandas as pd

Expand Down Expand Up @@ -46,8 +47,8 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec):
for stab in sample_table:

## Search
data_releases = ['DESI-EDR','BOSS-DR16']
#data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']
#data_releases = ['DESI-EDR','BOSS-DR16']
data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']

search_coords = stab["coord"]
dra = (search_radius_arcsec*u.arcsec).to(u.degree)
Expand Down
250 changes: 243 additions & 7 deletions spectroscopy/code_src/mast_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,236 @@

from specutils import Spectrum1D

import matplotlib.pyplot as plt


def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
'''
Retrieves HST spectra for a list of sources and groups/stacks them.
This main function runs two sub-functions:
- JWST_get_spec_helper() which searches, downloads, retrieves the spectra
- JWST_group_spectra() which groups and stacks the spectra

Parameters
----------
sample_table : `~astropy.table.Table`
Table with the coordinates and journal reference labels of the sources
search_radius_arcsec : `float`
Search radius in arcseconds.
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).

Returns
-------
df_spec : MultiIndexDFObject
The main data structure to store all spectra

'''

## Get the spectra
print("Searching and Downloading Spectra... ")
df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir)

## Group
print("Grouping Spectra... ")
df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=True, quickplot=True)


return(df_jwst_group)


def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
'''
Retrieves HST spectra for a list of sources.

Parameters
----------
sample_table : `~astropy.table.Table`
Table with the coordinates and journal reference labels of the sources
search_radius_arcsec : `float`
Search radius in arcseconds.
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).

Returns
-------
df_spec : MultiIndexDFObject
The main data structure to store all spectra

'''

## Create directory
this_data_dir = os.path.join(datadir , "JWST/")


## Initialize multi-index object:
df_spec = MultiIndexDFObject()

for stab in sample_table:

print("Processing source {}".format(stab["label"]))

## Query results
search_coords = stab["coord"]
query_results = Observations.query_criteria(coordinates = search_coords, radius = search_radius_arcsec * u.arcsec,
dataproduct_type=["spectrum"], obs_collection=["JWST"], intentType="science", calib_level=[3,4],
#instrument_name=['NIRSPEC/MSA', 'NIRSPEC/SLIT', 'NIRCAM/GRISM', 'NIRISS/WFSS'],
instrument_name=['NIRSPEC/MSA', 'NIRSPEC/SLIT'],
#filters=['CLEAR;PRISM','F070LP;G140M'],
dataRights=['PUBLIC']
)
print("Number of search results: {}".format(len(query_results)))

if len(query_results) > 0: # found some spectra


## Retrieve spectra
data_products_list = Observations.get_product_list(query_results)

## Filter
data_products_list_filter = Observations.filter_products(data_products_list,
productType=["SCIENCE"],
#filters=['CLEAR;PRISM','F070LP;G140M'],
#filters=['CLEAR;PRISM'],
extension="fits",
calib_level=[3,4], # only fully reduced or contributed
productSubGroupDescription=["X1D"], # only 1D spectra
dataRights=['PUBLIC'] # only public data
)
print("Number of files to download: {}".format(len(data_products_list_filter)))

if len(data_products_list_filter) > 0:

## Download
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)


## Create table
# NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
# have to "manually" get the product file names here and then use those to open the files.
keys = ["filters","obs_collection","instrument_name","calib_level","t_obs_release","proposal_id","obsid","objID","distance"]
tab = Table(names=keys + ["productFilename"] , dtype=[str,str,str,int,float,int,int,int,float]+[str])
for jj in range(len(data_products_list_filter)):
idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
tmp = query_results[idx_cross][keys]
tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )
print("number in table {}".format(len(tab)))

## Create multi-index object
for jj in range(len(tab)):

# find correct path name:
# Note that `download_results` does NOT have same order as `tab`!!
file_idx = np.where( [ tab["productFilename"][jj] in download_results["Local Path"][iii] for iii in range(len(download_results))] )[0]

# open spectrum
filepath = download_results["Local Path"][file_idx[0]]
print(filepath)
spec1d = Spectrum1D.read(filepath)

dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
label=[stab["label"]],
objectid=[stab["objectid"]],
#objID=[tab["objID"][jj]], # REMOVE
#obsid=[tab["obsid"][jj]],
mission=[tab["obs_collection"][jj]],
instrument=[tab["instrument_name"][jj]],
filter=[tab["filters"][jj]],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)

else:
print("Nothing to download for source {}.".format(stab["label"]))
else:
print("Source {} could not be found".format(stab["label"]))


return(df_spec)

def JWST_group_spectra(df, verbose, quickplot):
'''
Groups the JWST spectra and removes entries that have no spectra. Stacks
spectra that are similar and creates new DF.

Parameters
----------
df : MultiIndexDFObject
Raw JWST multi-index object (output from `JWST_get_spec()`).
verbose : bool
Flag for verbosity: `True` or `False`
quickplot : bool
If `True`, quick plots are made for each spectral group.

Returns
-------
df_cons : MultiIndexDFObject
Consolidated and grouped data structure storing the spectra.

'''

## Initialize multi-index object:
df_spec = MultiIndexDFObject()

## Create data table from DF.
tab = df.data.reset_index()

## Get objects
objects_unique = np.unique(tab["label"])

for obj in objects_unique:
if verbose: print("++Processing Object {} ++".format(obj))

## Get filters
filters_unique = np.unique(tab["filter"])
if verbose: print("List of filters in data frame: {}".format( " | ".join(filters_unique)) )

for filt in filters_unique:
if verbose: print("Processing {}: ".format(filt), end=" ")

sel = np.where( (tab["filter"] == filt) & (tab["label"] == obj) )[0]
#tab_sel = tab.copy()[sel]
tab_sel = tab.iloc[sel]
if verbose: print("Number of items: {}".format( len(sel)), end=" | ")

## get good ones
sum_flux = np.asarray([np.nansum(tab_sel.iloc[iii]["flux"]).value for iii in range(len(tab_sel)) ])
idx_good = np.where(sum_flux > 0)[0]
if verbose: print("Number of good spectra: {}".format(len(idx_good)))

if len(idx_good) > 0:
## Create wavelength grid
wave_grid = tab_sel.iloc[idx_good[0]]["wave"] # NEED TO BE MADE BETTER LATER

## Interpolate fluxes
fluxes_int = np.asarray([ np.interp(wave_grid , tab_sel.iloc[idx]["wave"] , tab_sel.iloc[idx]["flux"]) for idx in idx_good ])
fluxes_units = [tab_sel.iloc[idx]["flux"].unit for idx in idx_good]
fluxes_stack = np.nanmedian(fluxes_int,axis=0)
if verbose: print("Units of fluxes for each spectrum: {}".format( ",".join([str(tt) for tt in fluxes_units]) ) )

## Add to data frame
dfsingle = pd.DataFrame(dict(wave=[wave_grid * u.micrometer] , flux=[fluxes_stack * fluxes_units[0]], err=[np.repeat(0,len(fluxes_stack))],
label=[tab_sel["label"].iloc[0]],
objectid=[tab_sel["objectid"].iloc[0]],
mission=[tab_sel["mission"].iloc[0]],
instrument=[tab_sel["instrument"].iloc[0]],
filter=[tab_sel["filter"].iloc[0]],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)

## Quick plot
if quickplot:
tmp = np.percentile(fluxes_stack, q=(1,50,99) )
plt.plot(wave_grid , fluxes_stack)
plt.ylim(tmp[0],tmp[2])
plt.xlabel(r"Observed Wavelength [$\mu$m]")
plt.ylabel(r"Flux [Jy]")
plt.show()


return(df_spec)

def HST_get_spec(sample_table, search_radius_arcsec, datadir):
'''
Retrieves HST spectra for a list of sources.
Expand Down Expand Up @@ -73,21 +303,27 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
## Download
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)


## Create table
# NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
# have to "manually" get the product file names here and then use those to open the files.
keys = ["filters","obs_collection","instrument_name","calib_level","t_obs_release","proposal_id","obsid","objID","distance"]
tab = Table(names=keys , dtype=[str,str,str,int,float,int,int,int,float])
for jj in range(len(download_results)):
tmp = query_results[query_results["obsid"] == data_products_list_filter["obsID"][jj]][keys]
tab.add_row( list(tmp[0]) )
tab = Table(names=keys + ["productFilename"] , dtype=[str,str,str,int,float,int,int,int,float]+[str])
for jj in range(len(data_products_list_filter)):
idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
tmp = query_results[idx_cross][keys]
tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )

## Create multi-index object
for jj in range(len(tab)):

# find correct path name:
# Note that `download_results` does NOT have same order as `tab`!!
file_idx = np.where( [ tab["productFilename"][jj] in download_results["Local Path"][iii] for iii in range(len(download_results))] )[0]

# open spectrum
filepath = download_results[jj]["Local Path"]
filepath = download_results["Local Path"][file_idx[0]]
print(filepath)
spec1d = Spectrum1D.read(filepath)
spec1d = Spectrum1D.read(filepath)

dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
label=[stab["label"]],
Expand Down
6 changes: 4 additions & 2 deletions spectroscopy/code_src/sample_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#from astroquery.vizier import Vizier


def clean_sample(coords_list, labels_list, verbose=1):
def clean_sample(coords_list, labels_list, precision, verbose=1):
"""Makes a unique sample of skycoords and labels with no repeats. Attaches an object ID to the coords.

Parameters
Expand All @@ -17,6 +17,8 @@ def clean_sample(coords_list, labels_list, verbose=1):
list of Astropy SkyCoords derived from literature sources
labels_list : list
List of the first author name and publication year for tracking the sources
precision : float (astropy units)
Precision of matching/removing doubles. For example 0.5*u.arcsecond.
verbose : int, optional
Print out the length of the sample after applying this function

Expand All @@ -31,7 +33,7 @@ def clean_sample(coords_list, labels_list, verbose=1):
# now join the table with itself within a defined radius.
# We keep one set of original column names to avoid later need for renaming
tjoin = join(sample_table, sample_table, keys='coord',
join_funcs={'coord': join_skycoord(0.005 * u.deg)},
join_funcs={'coord': join_skycoord(precision)},
uniq_col_name='{col_name}{table_name}', table_names=['', '_2'])

# this join will return 4 entries for each redundant coordinate:
Expand Down
8 changes: 5 additions & 3 deletions spectroscopy/code_src/sdss_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from specutils import Spectrum1D

def SDSS_get_spec(sample_table, search_radius_arcsec):
def SDSS_get_spec(sample_table, search_radius_arcsec, data_release):
'''
Retrieves SDSS spectra for a list of sources. Note that no data will
be directly downloaded. All will be saved in cache.
Expand All @@ -25,6 +25,8 @@ def SDSS_get_spec(sample_table, search_radius_arcsec):
Table with the coordinates and journal reference labels of the sources
search_radius_arcsec : `float`
Search radius in arcseconds.
data_release : `int`
SDSS data release (e.g., 17 or 18)

Returns
-------
Expand All @@ -42,10 +44,10 @@ def SDSS_get_spec(sample_table, search_radius_arcsec):
## Get Spectra for SDSS
search_coords = stab["coord"]

xid = SDSS.query_region(search_coords, radius=search_radius_arcsec * u.arcsec, spectro=True)
xid = SDSS.query_region(search_coords, radius=search_radius_arcsec * u.arcsec, spectro=True, data_release=data_release)

if str(type(xid)) != "<class 'NoneType'>":
sp = SDSS.get_spectra(matches=xid, show_progress=True)
sp = SDSS.get_spectra(matches=xid, show_progress=True , data_release=data_release)

## Get data
wave = 10**sp[0]["COADD"].data.loglam * u.angstrom # only one entry because we only search for one xid at a time. Could change that?
Expand Down
Loading