From 015992c65c5746a54dcb3c38d2f37845962326cc Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Tue, 4 Oct 2022 16:58:00 -0600 Subject: [PATCH] Making multi-level indexing optional This modification addresses issue #25 by adding an additional parameter multi_index to some functions. When multi_index=False the output will be a dataframe with a single-level index (datetime) independently of the number of sites being queried. --- dataretrieval/nwis.py | 53 +++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 2de2924..19d5262 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -24,9 +24,12 @@ WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings'] -def format_response(df, service=None): +def format_response(df, multi_index=None, service=None): """Setup index for response from query. """ + if multi_index: + multi_index=None + if service == 'peaks': df = preformat_peaks_response(df) @@ -35,7 +38,7 @@ def format_response(df, service=None): # XXX: consider making site_no index return df - elif len(df['site_no'].unique()) > 1: + elif len(df['site_no'].unique()) > 1 and multi_index is None: # setup multi-index df.set_index(['site_no', 'datetime'], inplace=True) if hasattr(df.index.levels[1], 'tzinfo') and df.index.levels[1].tzinfo is None: @@ -55,7 +58,8 @@ def preformat_peaks_response(df): return df -def get_qwdata(datetime_index=True, wide_format=True, sites=None, start=None, end=None, **kwargs): +def get_qwdata(datetime_index=True, wide_format=True, sites=None, + start=None, end=None, multi_index=None,**kwargs): """ Get water sample data from qwdata service. @@ -71,6 +75,8 @@ def get_qwdata(datetime_index=True, wide_format=True, sites=None, start=None, en If the qwdata parameter begin_date is supplied, it will overwrite the start parameter end: string If the qwdata parameter end_date is supplied, it will overwrite the end parameter + multi_index: boolean + If False, a dataframe with a single-level index (datetime) is returned Returns: DataFrame containing times series data from the NWIS json and Metadata as tuple @@ -81,9 +87,11 @@ def get_qwdata(datetime_index=True, wide_format=True, sites=None, start=None, en end = kwargs.pop('end_date', end) sites = kwargs.pop('site_no', sites) return _qwdata(site_no=sites, begin_date=start, end_date=end, datetime_index=datetime_index, - ** kwargs) + multi_index=multi_index, ** kwargs) def _qwdata(datetime_index=True, **kwargs): + mi = kwargs.pop('multi_index') + # check number of sites, may need to create multiindex payload = {'agency_cd': 'USGS', @@ -124,7 +132,7 @@ def _qwdata(datetime_index=True, **kwargs): df = format_datetime(df, 'sample_dt', 'sample_tm', 'sample_start_time_datum_cd') - df = format_response(df) + df = format_response(df, mi) return df, _set_metadata(response, **kwargs) @@ -155,7 +163,7 @@ def _discharge_measurements(**kwargs): return _read_rdb(response.text), _set_metadata(response, **kwargs) -def get_discharge_peaks(sites=None, start=None, end=None, **kwargs): +def get_discharge_peaks(sites=None, start=None, end=None, multi_index=None, **kwargs): """ Get discharge peaks from the waterdata service. @@ -174,18 +182,19 @@ def get_discharge_peaks(sites=None, start=None, end=None, **kwargs): start = kwargs.pop('begin_date', start) end = kwargs.pop('end_date', end) sites = kwargs.pop('site_no', sites) - return _discharge_peaks(site_no=sites, begin_date=start, end_date=end, **kwargs) + return _discharge_peaks(site_no=sites, begin_date=start, end_date=end, multi_index=multi_index, **kwargs) def _discharge_peaks(**kwargs): + mi = kwargs.pop('multi_index') response = query_waterdata('peaks', format='rdb', **kwargs) df = _read_rdb(response.text) - return format_response(df, service='peaks'), _set_metadata(response, **kwargs) + return format_response(df, mi, service='peaks'), _set_metadata(response, **kwargs) -def get_gwlevels(start='1851-01-01', end=None, **kwargs): +def get_gwlevels(start='1851-01-01', end=None, multi_index=None, **kwargs): """ Querys the groundwater level service from waterservices @@ -202,16 +211,18 @@ def get_gwlevels(start='1851-01-01', end=None, **kwargs): """ start = kwargs.pop('startDT', start) end = kwargs.pop('endDT', end) - return _gwlevels(startDT=start, endDT=end, **kwargs) + return _gwlevels(startDT=start, endDT=end, multi_index=multi_index, **kwargs) def _gwlevels(**kwargs): + mi = kwargs.pop('multi_index') + response = query_waterservices('gwlevels', **kwargs) df = _read_rdb(response.text) df = format_datetime(df, 'lev_dt', 'lev_tm', 'lev_tz_cd') - return format_response(df), _set_metadata(response, **kwargs) + return format_response(df, mi), _set_metadata(response, **kwargs) def get_stats(sites, **kwargs): @@ -296,7 +307,7 @@ def query_waterservices(service, **kwargs): return query(url, payload=kwargs) -def get_dv(start=None, end=None, **kwargs): +def get_dv(start=None, end=None, multi_index=None, **kwargs): """ Get daily values data from NWIS and return it as a DataFrame. @@ -314,14 +325,15 @@ def get_dv(start=None, end=None, **kwargs): """ start = kwargs.pop('startDT', start) end = kwargs.pop('endDT', end) - return _dv(startDT=start, endDT=end, **kwargs) + return _dv(startDT=start, endDT=end, multi_index=multi_index, **kwargs) def _dv(**kwargs): + mi = kwargs.pop('multi_index') + response = query_waterservices('dv', format='json', **kwargs) - df = _read_json(response.json()) + df = _read_json(response.json(), mi) - df = format_response(df) return df, _set_metadata(response, **kwargs) @@ -413,7 +425,7 @@ def get_info(**kwargs): return _read_rdb(response.text), _set_metadata(response, **kwargs) -def get_iv(start=None, end=None, **kwargs): +def get_iv(start=None, end=None, multi_index=None, **kwargs): """Get instantaneous values data from NWIS and return it as a DataFrame. Note: If no start or end date are provided, only the most recent record is returned. @@ -430,12 +442,13 @@ def get_iv(start=None, end=None, **kwargs): """ start = kwargs.pop('startDT', start) end = kwargs.pop('endDT', end) - return _iv(startDT=start, endDT=end, **kwargs) + return _iv(startDT=start, endDT=end, multi_index=multi_index, **kwargs) def _iv(**kwargs): + mi = kwargs.pop('multi_index') response = query_waterservices('iv', format='json', **kwargs) - return _read_json(response.json()), _set_metadata(response, **kwargs) + return _read_json(response.json(), mi), _set_metadata(response, **kwargs) def get_pmcodes(parameterCd = 'All', partial = True): @@ -649,7 +662,7 @@ def get_record(sites=None, start=None, end=None, state=None, raise TypeError('{} service not yet implemented'.format(service)) -def _read_json(json, multi_index=False): +def _read_json(json, multi_index=None): """ Reads a NWIS Water Services formatted JSON into a DataFrame. @@ -714,7 +727,7 @@ def _read_json(json, multi_index=False): merged_df = update_merge(merged_df, record_df, na_only=True, on=['site_no', 'datetime']) - merged_df = format_response(merged_df) + merged_df = format_response(merged_df, multi_index) return merged_df