Skip to content

Commit

Permalink
Making multi-level indexing optional
Browse files Browse the repository at this point in the history
This modification addresses issue #25  by adding an additional parameter multi_index to some functions. When multi_index=False the output will be a dataframe with a single-level index (datetime) independently of the number of sites being queried.
  • Loading branch information
cjbas22 committed Oct 4, 2022
1 parent 322f507 commit 015992c
Showing 1 changed file with 33 additions and 20 deletions.
53 changes: 33 additions & 20 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings']


def format_response(df, service=None):
def format_response(df, multi_index=None, service=None):
"""Setup index for response from query.
"""
if multi_index:
multi_index=None

if service == 'peaks':
df = preformat_peaks_response(df)

Expand All @@ -35,7 +38,7 @@ def format_response(df, service=None):
# XXX: consider making site_no index
return df

elif len(df['site_no'].unique()) > 1:
elif len(df['site_no'].unique()) > 1 and multi_index is None:
# setup multi-index
df.set_index(['site_no', 'datetime'], inplace=True)
if hasattr(df.index.levels[1], 'tzinfo') and df.index.levels[1].tzinfo is None:
Expand All @@ -55,7 +58,8 @@ def preformat_peaks_response(df):
return df


def get_qwdata(datetime_index=True, wide_format=True, sites=None, start=None, end=None, **kwargs):
def get_qwdata(datetime_index=True, wide_format=True, sites=None,
start=None, end=None, multi_index=None,**kwargs):
"""
Get water sample data from qwdata service.
Expand All @@ -71,6 +75,8 @@ def get_qwdata(datetime_index=True, wide_format=True, sites=None, start=None, en
If the qwdata parameter begin_date is supplied, it will overwrite the start parameter
end: string
If the qwdata parameter end_date is supplied, it will overwrite the end parameter
multi_index: boolean
If False, a dataframe with a single-level index (datetime) is returned
Returns:
DataFrame containing times series data from the NWIS json and Metadata as tuple
Expand All @@ -81,9 +87,11 @@ def get_qwdata(datetime_index=True, wide_format=True, sites=None, start=None, en
end = kwargs.pop('end_date', end)
sites = kwargs.pop('site_no', sites)
return _qwdata(site_no=sites, begin_date=start, end_date=end, datetime_index=datetime_index,
** kwargs)
multi_index=multi_index, ** kwargs)

def _qwdata(datetime_index=True, **kwargs):
mi = kwargs.pop('multi_index')

# check number of sites, may need to create multiindex

payload = {'agency_cd': 'USGS',
Expand Down Expand Up @@ -124,7 +132,7 @@ def _qwdata(datetime_index=True, **kwargs):
df = format_datetime(df, 'sample_dt', 'sample_tm',
'sample_start_time_datum_cd')

df = format_response(df)
df = format_response(df, mi)
return df, _set_metadata(response, **kwargs)


Expand Down Expand Up @@ -155,7 +163,7 @@ def _discharge_measurements(**kwargs):
return _read_rdb(response.text), _set_metadata(response, **kwargs)


def get_discharge_peaks(sites=None, start=None, end=None, **kwargs):
def get_discharge_peaks(sites=None, start=None, end=None, multi_index=None, **kwargs):
"""
Get discharge peaks from the waterdata service.
Expand All @@ -174,18 +182,19 @@ def get_discharge_peaks(sites=None, start=None, end=None, **kwargs):
start = kwargs.pop('begin_date', start)
end = kwargs.pop('end_date', end)
sites = kwargs.pop('site_no', sites)
return _discharge_peaks(site_no=sites, begin_date=start, end_date=end, **kwargs)
return _discharge_peaks(site_no=sites, begin_date=start, end_date=end, multi_index=multi_index, **kwargs)


def _discharge_peaks(**kwargs):
mi = kwargs.pop('multi_index')
response = query_waterdata('peaks', format='rdb', **kwargs)

df = _read_rdb(response.text)

return format_response(df, service='peaks'), _set_metadata(response, **kwargs)
return format_response(df, mi, service='peaks'), _set_metadata(response, **kwargs)


def get_gwlevels(start='1851-01-01', end=None, **kwargs):
def get_gwlevels(start='1851-01-01', end=None, multi_index=None, **kwargs):
"""
Querys the groundwater level service from waterservices
Expand All @@ -202,16 +211,18 @@ def get_gwlevels(start='1851-01-01', end=None, **kwargs):
"""
start = kwargs.pop('startDT', start)
end = kwargs.pop('endDT', end)
return _gwlevels(startDT=start, endDT=end, **kwargs)
return _gwlevels(startDT=start, endDT=end, multi_index=multi_index, **kwargs)


def _gwlevels(**kwargs):
mi = kwargs.pop('multi_index')

response = query_waterservices('gwlevels', **kwargs)

df = _read_rdb(response.text)
df = format_datetime(df, 'lev_dt', 'lev_tm', 'lev_tz_cd')

return format_response(df), _set_metadata(response, **kwargs)
return format_response(df, mi), _set_metadata(response, **kwargs)


def get_stats(sites, **kwargs):
Expand Down Expand Up @@ -296,7 +307,7 @@ def query_waterservices(service, **kwargs):
return query(url, payload=kwargs)


def get_dv(start=None, end=None, **kwargs):
def get_dv(start=None, end=None, multi_index=None, **kwargs):
"""
Get daily values data from NWIS and return it as a DataFrame.
Expand All @@ -314,14 +325,15 @@ def get_dv(start=None, end=None, **kwargs):
"""
start = kwargs.pop('startDT', start)
end = kwargs.pop('endDT', end)
return _dv(startDT=start, endDT=end, **kwargs)
return _dv(startDT=start, endDT=end, multi_index=multi_index, **kwargs)


def _dv(**kwargs):
mi = kwargs.pop('multi_index')

response = query_waterservices('dv', format='json', **kwargs)
df = _read_json(response.json())
df = _read_json(response.json(), mi)

df = format_response(df)
return df, _set_metadata(response, **kwargs)


Expand Down Expand Up @@ -413,7 +425,7 @@ def get_info(**kwargs):
return _read_rdb(response.text), _set_metadata(response, **kwargs)


def get_iv(start=None, end=None, **kwargs):
def get_iv(start=None, end=None, multi_index=None, **kwargs):
"""Get instantaneous values data from NWIS and return it as a DataFrame.
Note: If no start or end date are provided, only the most recent record is returned.
Expand All @@ -430,12 +442,13 @@ def get_iv(start=None, end=None, **kwargs):
"""
start = kwargs.pop('startDT', start)
end = kwargs.pop('endDT', end)
return _iv(startDT=start, endDT=end, **kwargs)
return _iv(startDT=start, endDT=end, multi_index=multi_index, **kwargs)


def _iv(**kwargs):
mi = kwargs.pop('multi_index')
response = query_waterservices('iv', format='json', **kwargs)
return _read_json(response.json()), _set_metadata(response, **kwargs)
return _read_json(response.json(), mi), _set_metadata(response, **kwargs)


def get_pmcodes(parameterCd = 'All', partial = True):
Expand Down Expand Up @@ -649,7 +662,7 @@ def get_record(sites=None, start=None, end=None, state=None,
raise TypeError('{} service not yet implemented'.format(service))


def _read_json(json, multi_index=False):
def _read_json(json, multi_index=None):
"""
Reads a NWIS Water Services formatted JSON into a DataFrame.
Expand Down Expand Up @@ -714,7 +727,7 @@ def _read_json(json, multi_index=False):
merged_df = update_merge(merged_df, record_df, na_only=True,
on=['site_no', 'datetime'])

merged_df = format_response(merged_df)
merged_df = format_response(merged_df, multi_index)
return merged_df


Expand Down

0 comments on commit 015992c

Please sign in to comment.