diff --git a/captoolkit/corrlaser.py.bak b/captoolkit/corrlaser.py.bak deleted file mode 100755 index 3c89376..0000000 --- a/captoolkit/corrlaser.py.bak +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python -""" -Compute and apply corrections for ICESat Laser 2 and 3. - -From Borsa et al. (2019): - Subtract 1.7 cm from Laser 2 and add 1.1 cm to Laser 3 - -Notes: - Edit some parameters bellow. - -Example: - python corrlaser.py /path/to/files/*.h5 - -Credits: - captoolkit - JPL Cryosphere Altimetry Processing Toolkit - - Fernando Paolo (paolofer@jpl.nasa.gov) - Johan Nilsson (johan.nilsson@jpl.nasa.gov) - Alex Gardner (alex.s.gardner@jpl.nasa.gov) - - Jet Propulsion Laboratory, California Institute of Technology - -""" -import os -import sys - -import h5py -import numpy as np -from astropy.time import Time -from future import standard_library - -standard_library.install_aliases() - -# === EDIT HERE ====================================== # - -# time variable -tvar = "t_year" - -# height variable -hvar = "h_cor" - -# apply or only store the correction for each height -apply_ = True - -# number of jobs in parallel -njobs = 16 - -# === END EDIT ======================================= # - -# Corrections for lasers/campaigns (in meters) -# Cor will be subtracted from height: h - l{1,2,3} -bias = {"l1": 0.0, "l2": 0.017, "l3": -0.011, None: 0.0} - -# Definition of ICESat campaigns/lasers -# https://nsidc.org/data/icesat/laser_op_periods.html -campaigns = [ - (Time("2003-02-20").decimalyear, Time("2003-03-21").decimalyear, "l1a"), - (Time("2003-03-21").decimalyear, Time("2003-03-29").decimalyear, "l1b"), - ( - Time("2003-09-25").decimalyear, - Time("2003-11-19").decimalyear, - "l2a", - ), # 8-d + 91-d + 91-d (NSIDC) - (Time("2004-02-17").decimalyear, Time("2004-03-21").decimalyear, "l2b"), - (Time("2004-05-18").decimalyear, Time("2004-06-21").decimalyear, "l2c"), - (Time("2004-10-03").decimalyear, Time("2004-11-08").decimalyear, "l3a"), - (Time("2005-02-17").decimalyear, Time("2005-03-24").decimalyear, "l3b"), - (Time("2005-05-20").decimalyear, Time("2005-06-23").decimalyear, "l3c"), - (Time("2005-10-21").decimalyear, Time("2005-11-24").decimalyear, "l3d"), - (Time("2006-02-22").decimalyear, Time("2006-03-28").decimalyear, "l3e"), - (Time("2006-05-24").decimalyear, Time("2006-06-26").decimalyear, "l3f"), - (Time("2006-10-25").decimalyear, Time("2006-11-27").decimalyear, "l3g"), - (Time("2007-03-12").decimalyear, Time("2007-04-14").decimalyear, "l3h"), - (Time("2007-10-02").decimalyear, Time("2007-11-05").decimalyear, "l3i"), - (Time("2008-02-17").decimalyear, Time("2008-03-21").decimalyear, "l3j"), - (Time("2008-10-04").decimalyear, Time("2008-10-19").decimalyear, "l3k"), - (Time("2008-11-25").decimalyear, Time("2008-12-17").decimalyear, "l2d"), - (Time("2009-03-09").decimalyear, Time("2009-04-11").decimalyear, "l2e"), - (Time("2009-09-30").decimalyear, Time("2009-10-11").decimalyear, "l2f"), -] - - -def _get_laser_bias(time, campaigns, bias): - """ Map time (yr) to campaign to correction. """ - camp = [ca for (t1, t2, ca) in campaigns if t1 <= time < t2] # ['c']|[] - laser = camp[0][:2] if camp else None - - return bias[laser] - - -get_laser_bias = np.vectorize(_get_laser_bias, excluded=[1, 2], cache=True) - - -def main(fname): - with h5py.File(fname, "a") as f: - t = f[tvar][:] - b = get_laser_bias(t, campaigns, bias) - - f["laser_bias"] = b # save bias - - if apply_: - f[hvar][:] -= b # remove bias - f.flush() - - os.rename(fname, fname.replace(".h5", "_LCOR.h5")) - - -files = sys.argv[1:] -print("Number of files:", len(files)) - - -if njobs == 1: - print("running sequential code ...") - [main(f) for f in files] -else: - print("running parallel code (%d jobs) ..." % njobs) - from joblib import Parallel, delayed - - Parallel(n_jobs=njobs, verbose=5)(delayed(main)(f) for f in files) - -print("done.") diff --git a/captoolkit/corrscatt.py.bak b/captoolkit/corrscatt.py.bak deleted file mode 100755 index bb163a3..0000000 --- a/captoolkit/corrscatt.py.bak +++ /dev/null @@ -1,1609 +0,0 @@ -#!/usr/bin/env python -""" -Corrects radar altimetry height to correlation with waveform parameters. - -Estimates the apparent height change originating from radar surface -scattering variations (which changes the waveform shape). - -Notes: - - This is a complex code; and it has been tailored to a specific use case. - - - The (back)scattering correction is applied as: - h_corr = h - h_bs - - - Edit parameters below. - -Example: - python scattcor.py -v lon lat h_res t_year -w bs lew tes -d 1 -r 4 \ - q 2 -p dif -f /path/to/*files.h5 - - python scattcor.py -h - -Credits: - captoolkit - JPL Cryosphere Altimetry Processing Toolkit - - Fernando Paolo (paolofer@jpl.nasa.gov) - Johan Nilsson (johan.nilsson@jpl.nasa.gov) - Alex Gardner (alex.s.gardner@jpl.nasa.gov) - - Jet Propulsion Laboratory, California Institute of Technology - -""" -import os -import h5py -import pyproj -import warnings -import argparse -import numpy as np -import statsmodels.api as sm -import matplotlib.pyplot as plt -from scipy.stats import mode -from scipy.spatial import cKDTree -from scipy.signal import savgol_filter - -from math import sqrt -from sklearn.metrics import mean_squared_error - -# --- Edit ------------------------------------------------------------ - -# OTE: This uses random cells, plot results, and do not save data -TEST_MODE = False - -# Use random locations -USE_SEED = True -N_CELLS = 20 -SEED = 222 - -# If True, uses given locations instead of random nodes (specified below) -USE_NODES = False - -# Specific locations for testing: Ross, Getz, PIG -NODES = [ - (-158.71, -78.7584), - # (-124.427, -74.4377), # Getz - # (-100.97, -75.1478), # PIG - (-158.40, -78.80), - (-178.40, -78.80), - (-188.00, -77.95), - (-160.00, -80.40), - (-178.40, -80.60), - (-190.40, -80.60), -] - -# Suffix for output file -SUFFIX1 = "_SCAT" -SUFFIX2 = "_SCATGRD" - -# Nave of variable to save bs-correction -H_BS = "h_bs" - -# Apply 3-month running median to processed time series -BIN_SERIES = True - -# Minimum correlation for each waveform param (NOT USED) -R_MIN = 0.1 - -# Minimum points per cell to compute solution -MIN_PTS = 50 - -# Minimum number of months to compute solution -MIN_MONTHS = 24 - -# Default time range -TMIN, TMAX = -9999, 9999 - -# Savitzky-Golay params for numerical diff -WINDOW = 15 -ORDER = 1 -DERIV = 1 - -# ---------------------------------------------------------------- - -# Supress anoying warnings -warnings.filterwarnings("ignore") - - -def get_args(): - """ Get command-line arguments. """ - msg = "Correct height data for surface scattering variation." - parser = argparse.ArgumentParser(description=msg) - parser.add_argument( - "-f", - metavar="file", - dest="files", - type=str, - nargs="+", - help="HDF5 file(s) to process", - required=True, - ) - parser.add_argument( - "-d", - metavar=("length"), - dest="dxy", - type=float, - nargs=1, - help=("block size of grid cell (km)"), - default=[None], - required=True, - ) - parser.add_argument( - "-r", - metavar=("radius"), - dest="radius", - type=float, - nargs=1, - help=("search radius (w/relocation) (km)"), - default=[0], - ) - parser.add_argument( - "-q", - metavar=("n_reloc"), - dest="nreloc", - type=int, - nargs=1, - help=("number of relocations for search radius"), - default=[0], - ) - parser.add_argument( - "-p", - metavar=None, - dest="proc", - type=str, - nargs=1, - help=("pre-process series for sensitivity estimation"), - choices=("det", "dif"), - default=[None], - ) - parser.add_argument( - "-v", - metavar=("lon", "lat", "h", "t"), - dest="vnames", - type=str, - nargs=4, - help=("name of x/y/z/t variables in the HDF5"), - default=[None], - required=True, - ) - parser.add_argument( - "-w", - metavar=("bs", "lew", "tes"), - dest="wnames", - type=str, - nargs=3, - help=("name of sig0/LeW/TeS parameters in HDF5"), - default=[None], - required=True, - ) - parser.add_argument( - "-j", - metavar=("epsg_num"), - dest="proj", - type=str, - nargs=1, - help=("EPSG proj number (AnIS=3031, GrIS=3413)"), - default=["3031"], - ) - parser.add_argument( - "-n", - metavar=("n_jobs"), - dest="njobs", - type=int, - nargs=1, - help="number of jobs for parallel processing", - default=[1], - ) - parser.add_argument( - "-t", - metavar=("tmin", "tmax"), - dest="tlim", - type=float, - nargs=2, - help="time interval to compute corrections (dec years)", - default=[TMIN, TMAX], - ) - parser.add_argument( - "-b", - metavar=("e", "w", "s", "n"), - dest="bbox", - type=float, - nargs=4, - help="full bbox in case of processing tiles for consistency", - default=[None], - ) - parser.add_argument( - "-a", - dest="apply", - action="store_true", - help=("apply correction to height in addition to saving"), - default=False, - ) - return parser.parse_args() - - -""" Generic functions """ - - -def binning( - x, - y, - xmin=None, - xmax=None, - dx=1 / 12.0, - window=3 / 12.0, - interp=False, - median=False, -): - """ - Time-series binning (w/overlapping windows). - - Args: - x,y: time and value of time series. - xmin,xmax: time span of returned binned series. - dx: time step of binning. - window: size of binning window. - interp: interpolate binned values to original x points. - """ - if xmin is None: - xmin = np.nanmin(x) - if xmax is None: - xmax = np.nanmax(x) - - steps = np.arange(xmin, xmax + dx, dx) - bins = [(ti, ti + window) for ti in steps] - - N = len(bins) - - yb = np.full(N, np.nan) - xb = np.full(N, np.nan) - eb = np.full(N, np.nan) - nb = np.full(N, np.nan) - sb = np.full(N, np.nan) - - for i in range(N): - - t1, t2 = bins[i] - (idx,) = np.where((x >= t1) & (x <= t2)) - - if len(idx) == 0: - continue - - ybv = y[idx] - # xbv = x[idx] - - if median: - yb[i] = np.nanmedian(ybv) - else: - yb[i] = np.nanmean(ybv) - - xb[i] = 0.5 * (t1 + t2) - eb[i] = mad_std(ybv) - nb[i] = np.sum(~np.isnan(ybv)) - sb[i] = np.sum(ybv) - - if interp: - yb = np.interp(x, xb, yb) - eb = np.interp(x, xb, eb) - sb = np.interp(x, xb, sb) - xb = x - - return xb, yb, eb, nb, sb - - -def transform_coord(proj1, proj2, x, y): - """ Transform coordinates from proj1 to proj2 (EPSG num). """ - # Set full EPSG projection strings - proj1 = pyproj.Proj("+init=EPSG:" + str(proj1)) - proj2 = pyproj.Proj("+init=EPSG:" + str(proj2)) - # Convert coordinates - return pyproj.transform(proj1, proj2, x, y) - - -def mad_std(x, axis=None): - """ Robust standard deviation (using MAD). """ - return 1.4826 * np.nanmedian(np.abs(x - np.nanmedian(x, axis)), axis) - - -def mode_filter(x, min_count=10, maxiter=3): - """Iterative mode filter. - - Remove values repeated 'min_count' times. - """ - n_iter = 0 - while n_iter < maxiter: - mod, count = mode(x[~np.isnan(x)]) - if len(count) < 1: - break - if count[0] > min_count: - x[x == mod[0]] = np.nan - n_iter += 1 - else: - n_iter = maxiter - return x - - -def median_filter(x, n_median=3): - """ Remove values greater than n * MAD-Std. """ - x[np.abs(x) > n_median * mad_std(x)] = np.nan - return x - - -def detrend_binned(x, y, order=1, window=3 / 12.0): - """Bin data (Med), compute trend (OLS) on binned, detrend original data.""" - x_b, y_b = binning(x, y, median=True, window=window, interp=False)[:2] - i_valid = ~np.isnan(y_b) - x_b, y_b = x_b[i_valid], y_b[i_valid] - try: - coef = np.polyfit(x_b, y_b, order) - y_trend = np.polyval(coef, x) # NOTE: Eval on full time - except IOError: - y_trend = np.zeros_like(y) - return y - y_trend, y_trend - - -def sigma_filter(x, y, order=1, window=3 / 12.0, n_iter=3, n_sigma=3): - """ Detrend (binned) data, remove 3 sigma from residual, repeat. """ - y_filt, y_res = y.copy(), y.copy() - for _ in range(n_iter): - y_res, y_trend = detrend_binned(x, y_res, order=order, window=window) - (idx,) = np.where(np.abs(y_res) > mad_std(y_res) * n_sigma) - if len(idx) == 0: - break # if no data to filter, stop iterating - y_res[idx] = np.nan - if np.sum(~np.isnan(y_res)) < 10: - break # NOTE: Arbitrary min obs - y_filt[np.isnan(y_res)] = np.nan - return y_filt - - -def detrend_binned2(x, y, window=3 / 12.0): - """ Bin data (Med), detrend original data with binned data. """ - x_b, y_b = binning(x, y, median=True, window=window, interp=True)[:2] - return y - y_b, y_b - - -def sigma_filter2(x, y, window=3 / 12.0, n_iter=3, n_sigma=3): - """ Bin data, remove binned, remove 3 sigma from residual, repeat. """ - y_filt, y_res = y.copy(), y.copy() - for _ in range(n_iter): - y_res, y_trend = detrend_binned2(x, y_res, window=window) - (idx,) = np.where(np.abs(y_res) > mad_std(y_res) * n_sigma) - if len(idx) == 0: - break # if no data to filter, stop iterating - y_res[idx] = np.nan - if sum(~np.isnan(y_res)) < 10: - break # NOTE: Arbitrary min obs - y_filt[np.isnan(y_res)] = np.nan - return y_filt - - -def center(*arrs): - """ Remove mean from array(s). """ - return [a - np.nanmean(a) for a in arrs] - - -def normalize(*arrs): - """ Normalize array(s) by std. """ - # return [a / np.nanstd(a, ddof=1) for a in arrs] - return [a / mad_std(a) for a in arrs] - - -def corr_coef(h, bs, lew, tes): - """ Get corr coef between h and w/f params. """ - (idx,) = np.where( - ~np.isnan(h) & ~np.isnan(bs) & ~np.isnan(lew) & ~np.isnan(tes) - ) - h_, bs_, lew_, tes_ = h[idx], bs[idx], lew[idx], tes[idx] - r_bs = np.corrcoef(bs_, h_)[0, 1] - r_lew = np.corrcoef(lew_, h_)[0, 1] - r_tes = np.corrcoef(tes_, h_)[0, 1] - return r_bs, r_lew, r_tes - - -def corr_grad(h, bs, lew, tes, normalize=False, robust=False): - """ Get corr gradient (slope) between h and w/f params. """ - (idx,) = np.where( - ~np.isnan(h) & ~np.isnan(bs) & ~np.isnan(lew) & ~np.isnan(tes) - ) - if len(idx) < 3: - return np.nan, np.nan, np.nan - h_, bs_, lew_, tes_ = h[idx], bs[idx], lew[idx], tes[idx] - - try: - if robust: - # Robust line fit - s_bs = linefit(bs_, h_, return_coef=True)[0] - s_lew = linefit(lew_, h_, return_coef=True)[0] - s_tes = linefit(tes_, h_, return_coef=True)[0] - else: - # OLS line fit - s_bs = np.polyfit(bs_, h_, 1)[0] - s_lew = np.polyfit(lew_, h_, 1)[0] - s_tes = np.polyfit(tes_, h_, 1)[0] - - if normalize: - s_bs /= mad_std(bs_) - s_lew /= mad_std(lew_) - s_tes /= mad_std(tes_) - except IOError: - return np.nan, np.nan, np.nan - - return s_bs, s_lew, s_tes - - -def linefit(x, y, return_coef=False): - """ - Fit a straight-line by robust regression (M-estimate: Huber, 1981). - - If `return_coef=True` returns the slope (m) and intercept (c). - """ - assert sum(~np.isnan(y)) > 1 - - X = sm.add_constant(x, prepend=False) - y_fit = sm.RLM(y, X, M=sm.robust.norms.HuberT(), missing="drop").fit( - maxiter=1, tol=0.001 - ) - - x_fit = x[~np.isnan(y)] - - if return_coef: - if len(y_fit.params) < 2: - return y_fit.params[0], 0.0 - else: - return y_fit.params[:] - else: - return x_fit, y_fit.fittedvalues - - -def is_empty(ifile): - """If file is empty/corruted, return True.""" - try: - with h5py.File(ifile, "r") as f: - return not bool(list(f.keys())) - except IOError: - return True - - -""" Helper functions """ - - -def filter_data(t, h, bs, lew, tes, n_sigma=3, window=3 / 12.0): - """ - Use various filters to remove outliers. - - Replaces outliers with NaNs. - """ - # Iterative mode filter (for repeated values) - h = mode_filter(h, min_count=10, maxiter=3) - bs = mode_filter(bs, min_count=10, maxiter=3) - lew = mode_filter(lew, min_count=10, maxiter=3) - tes = mode_filter(tes, min_count=10, maxiter=3) - if 1: - # Iterative n-sigma filter (w.r.t. the OLS trend) - h = sigma_filter(t, h, order=2, n_sigma=n_sigma, n_iter=3) - bs = sigma_filter(t, bs, order=2, n_sigma=n_sigma, n_iter=3) - lew = sigma_filter(t, lew, order=2, n_sigma=n_sigma, n_iter=3) - tes = sigma_filter(t, tes, order=2, n_sigma=n_sigma, n_iter=3) - else: - # Iterative n-sigma filter (w.r.t. the Med-binned) - h = sigma_filter2(t, h, window=window, n_sigma=n_sigma, n_iter=3) - bs = sigma_filter2(t, bs, window=window, n_sigma=n_sigma, n_iter=3) - lew = sigma_filter2(t, lew, window=window, n_sigma=n_sigma, n_iter=3) - tes = sigma_filter2(t, tes, window=window, n_sigma=n_sigma, n_iter=3) - return t, h, bs, lew, tes - - -def interp_params(t, h, bs, lew, tes): - """ - Interpolate waveform params based on height-series valid entries. - - See also: - interp_params2() - """ - params = [bs, lew, tes] - - # Find the number of valid entries - npts = [sum(~np.isnan(x)) for x in params] - - # Determine all the entries that should have valid data - isvalid = ~np.isnan(h) - n_valid = sum(isvalid) - - # Do nothing if params are empty or have the same valid entries - if np.all(npts == n_valid): - return params - - # Sort indices for interpolation - i_sort = np.argsort(t) - - for k, (n_p, p) in enumerate(zip(npts, [bs, lew, tes])): - - if n_p == n_valid: - continue - - # Get the points that should be interpolated (if any) - (i_interp,) = np.where(np.isnan(p) & isvalid) - - """ - print 'full length: ', len(p) - print 'max valid entries:', n_valid - print 'valid entries: ', sum(~np.isnan(p)) - print 'interp entries: ', len(i_interp) - print t[i_interp] - """ - - # Get sorted and clean (w/o NaNs) series to interpolate - tt, pp = t[i_sort], p[i_sort] - (i_valid,) = np.where(~np.isnan(pp)) - tt, pp = tt[i_valid], pp[i_valid] - - p_interp = np.interp(t[i_interp], tt, pp) - - p[i_interp] = p_interp - - params[k] = p - - return params - - -def get_bboxs(lon, lat, dxy, proj="3031"): - """ Define cells (bbox) for estimating corrections. """ - - # Convert into sterographic coordinates - x, y = transform_coord("4326", proj, lon, lat) - - # Number of tile edges on each dimension - Nns = int(np.abs(np.nanmax(y) - np.nanmin(y)) / dxy) + 1 - New = int(np.abs(np.nanmax(x) - np.nanmin(x)) / dxy) + 1 - - # Coord of tile edges for each dimension - xg = np.linspace(x.min(), x.max(), New) - yg = np.linspace(y.min(), y.max(), Nns) - - # Vector of bbox for each cell - bboxs = [ - (w, e, s, n) - for w, e in zip(xg[:-1], xg[1:]) - for s, n in zip(yg[:-1], yg[1:]) - ] - del xg, yg - - # print 'total grid cells:', len(bboxs) - return bboxs - - -def make_grid(xmin, xmax, ymin, ymax, dx, dy): - """ Construct output grid-coordinates. """ - Nn = int((np.abs(ymax - ymin)) / dy) + 1 # ny - Ne = int((np.abs(xmax - xmin)) / dx) + 1 # nx - xi = np.linspace(xmin, xmax, num=Ne) - yi = np.linspace(ymin, ymax, num=Nn) - return np.meshgrid(xi, yi) - - -def get_cell_idx(lon, lat, bbox, proj=3031): - """ Get indexes of all data points inside cell. """ - - # Bounding box of grid cell - xmin, xmax, ymin, ymax = bbox - - # Convert lon/lat to sterographic coordinates - x, y = transform_coord(4326, proj, lon, lat) - - # Get the sub-tile (grid-cell) indices - (i_cell,) = np.where((x >= xmin) & (x <= xmax) & (y >= ymin) & (y <= ymax)) - - return i_cell - - -def get_radius_idx( - x, y, x0, y0, r, Tree, n_reloc=0 -): # NOTE: Add min reloc dist????? - """ Get indices of all data points inside radius. """ - - # Query the Tree from the node - idx = Tree.query_ball_point((x0, y0), r) - - # print 'query #: 1 ( first search )' - - # Either no relocation or not enough points to do relocation - if n_reloc < 1 or len(idx) < 2: - return idx - - # Relocate center of search radius and query again - for k in range(n_reloc): - - # Compute new search location => relocate initial center - x0_new, y0_new = np.median(x[idx]), np.median(y[idx]) - - # Compute relocation distance - reloc_dist = np.hypot(x0_new - x0, y0_new - y0) - - # Do not allow total relocation to be larger than the search radius - if reloc_dist > r: - break - - # print 'query #:', k+2, '( reloc #:', k+1, ')' - # print 'relocation dist:', reloc_dist - - # Query from the new location - idx = Tree.query_ball_point((x0_new, y0_new), r) - - # If max number of relocations reached, exit - if n_reloc == k + 1: - break - - return idx - - -def multi_fit_coef(t_, h_, bs_, lew_, tes_): - """ - Calculate scattering correction for height time series. - - The correction is given as a linear combination (multivariate fit) of - waveform parameters. The correction time series is obtain in two steps: - - 1) Fit the coefficients to the differenced/detrended series: - - det[h](t) = a det[Bs](t) + b det[LeW](t) + c det[TeS](t) - - 2) Linear combination of original series using fitted coeffs: - - h_cor(t) = a Bs(t) + b LeW(t) + c TeS(t) - - where a, b, c are the "sensitivities" (or weights, or scaling factors) - for each waveform-parameter time series. - - Args: - t: time - h: height change (residuals from mean topo) - bs: backscatter coefficient - lew: leading-edge width - tes: trailing-edge slope - - """ - # Ensure zero mean of processed series - h_, bs_, lew_, tes_ = center(h_, bs_, lew_, tes_) - - # Construct design matrix: First-order model - A_ = np.vstack((bs_, lew_, tes_)).T - - # Check for division by zero - try: - - # Fit robust linear model on differenced series (w/o NaNs) - model = sm.OLS(h_, A_, missing="drop").fit(method="qr") - - # print model.summary() - - # Get multivar coefficients for Bs, LeW, TeS - a, b, c = model.params[:3] - - # Get adjusted r-squared -> model performance metric - # (adjusted for the model degrees of freedom; 3 in this case) - r2 = model.rsquared_adj - - # Get p-value of F-statistics -> significance of overall fit - # (F-test assesses multiple coefficients simultaneously) - pval = model.f_pvalue - - # Get p-value of t-statistics -> significance of each coef - # (t-test assesses each model coefficient individually) - pvals = model.pvalues - - # Set all params to zero if exception detected - except IOError: - - print("MULTIVARIATE FIT FAILED, setting params -> 0") - print("VALID DATA POINTS in h:", sum(~np.isnan(h_))) - - a, b, c, r2, pval, pvals = 0, 0, 0, 0, 1e3, [1e3, 1e3, 1e3] - - return [a, b, c, r2, pval, pvals] - - -def rmse(t, x1, x2, order=1): - """ RMSE between (detrended) x1 and x2. """ - x1_res = detrend_binned(t, x1, order=order)[0] # use OLS poly fit - x2_res = detrend_binned(t, x2, order=order)[0] - ii = ~np.isnan(x1_res) & ~np.isnan(x2_res) - x1_res, x2_res = x1_res[ii], x2_res[ii] - return sqrt(mean_squared_error(x1_res, x2_res)) - - -def std_change(t, x1, x2, order=1): - """Variance change from (detrended) x1 to x2 (magnitude and percentage)""" - idx = ~np.isnan(x1) & ~np.isnan(x2) - if sum(idx) < 3: - return np.nan, np.nan - x1_res = detrend_binned(t, x1, order=order)[0] # use OLS poly fit - x2_res = detrend_binned(t, x2, order=order)[0] - s1 = mad_std(x1_res) - s2 = mad_std(x2_res) - delta_s = s2 - s1 - return delta_s, delta_s / s1 - - -def trend_change(t, x1, x2): - """ Linear-trend change from x1 to x2 (magnitude and percentage). """ - idx = ~np.isnan(x1) & ~np.isnan(x2) - if sum(idx) < 3: - return np.nan, np.nan - t_, x1_, x2_ = t[idx], x1[idx], x2[idx] - x1_ -= x1_.mean() - x2_ -= x2_.mean() - a1 = np.polyfit(t_, x1_, 1)[0] # use OLS poly fit - a2 = np.polyfit(t_, x2_, 1)[0] - delta_a = a2 - a1 - return delta_a, delta_a / np.abs(a1) - - -def sgolay1d(h, window=3, order=1, deriv=0, dt=1.0, mode="nearest", time=None): - """Savitztky-Golay filter with support for NaNs - - If time is given, interpolate NaNs otherwise pad w/zeros. - - dt is spacing between samples. - """ - h2 = h.copy() - (ii,) = np.where(np.isnan(h2)) - (jj,) = np.where(np.isfinite(h2)) - if len(ii) > 0 and time is not None: - h2[ii] = np.interp(time[ii], time[jj], h2[jj]) - elif len(ii) > 0: - h2[ii] = 0 - else: - pass - h2 = savgol_filter(h2, window, order, deriv, delta=dt, mode=mode) - return h2 - - -def overlap(x1, x2, y1, y2): - """ Return True if x1-x2/y1-y2 ranges overlap. """ - return (x2 >= y1) & (y2 >= x1) - - -def intersect(x1, x2, y1, y2, a1, a2, b1, b2): - """ Return True if two (x1,x2,y1,y2) rectangles intersect. """ - return overlap(x1, x2, a1, a2) & overlap(y1, y2, b1, b2) - - -def plot( - x, - y, - xc, - yc, - tc, - hc, - bc, - wc, - sc, - hc_, - bc_, - wc_, - sc_, - hc_cor, - h_bs, - r_bc, - r_wc, - r_sc, - s_bc, - s_wc, - s_sc, - d_std, - p_std, - d_trend, - p_trend, - r2, - pval, - pvals, -): - - tc_ = tc.copy() - - # Bin variables - hc_b = binning(tc, hc_cor, median=True, window=3 / 12.0)[ - 1 - ] # NOTE: If binning before, this is binning over a binning - bc_b = binning(tc, bc, median=True, window=3 / 12.0)[1] - wc_b = binning(tc, wc, median=True, window=3 / 12.0)[1] - tc_b, sc_b = binning(tc, sc, median=True, window=3 / 12.0)[:2] - - # Compute trends for plot - (ii,) = np.where(~np.isnan(tc) & ~np.isnan(hc) & ~np.isnan(hc_cor)) - t_, h1_, h2_ = tc[ii], hc[ii], hc_cor[ii] - coefs1 = np.polyfit(t_, h1_, 1) - coefs2 = np.polyfit(t_, h2_, 1) - trend1 = np.polyval(coefs1, tc) - trend2 = np.polyval(coefs2, tc) - - # Mask NaNs for plotting - mask = np.isfinite(hc_b) - - # Default color cycle - cmap = plt.get_cmap("tab10") - - plt.figure(figsize=(6, 8)) - - plt.subplot(4, 1, 1) - plt.plot(tc, hc, ".") - plt.plot(tc, hc_cor, ".") - plt.plot(tc_b[mask], hc_b[mask], "-", color=cmap(3), linewidth=2) - plt.plot(tc, trend1, "-", color=cmap(0), linewidth=1.5) - plt.plot(tc, trend2, "-", color=cmap(3), linewidth=1.5) - plt.ylabel("Height (m)") - plt.title("Original time series") - - plt.subplot(4, 1, 2) - plt.plot(tc, bc, ".") - plt.plot(tc_b[mask], bc_b[mask], "-", linewidth=2) - plt.ylabel("Bs (s.d.)") - - plt.subplot(4, 1, 3) - plt.plot(tc, wc, ".") - plt.plot(tc_b[mask], wc_b[mask], "-", linewidth=2) - plt.ylabel("LeW (s.d.)") - - plt.subplot(4, 1, 4) - plt.plot(tc, sc, ".") - plt.plot(tc_b[mask], sc_b[mask], "-", linewidth=2) - plt.ylabel("TeS (s.d.)") - - # Bin variables - hc_b_ = binning(tc_, hc_, median=False, window=3 / 12.0)[ - 1 - ] # NOTE: If original TS were binned, - bc_b_ = binning(tc_, bc_, median=False, window=3 / 12.0)[ - 1 - ] # this is binning over a binning - wc_b_ = binning(tc_, wc_, median=False, window=3 / 12.0)[1] - tc_b_, sc_b_ = binning(tc_, sc_, median=True, window=3 / 12.0)[:2] - - # Fit seasonality - plot_season = True - if plot_season: - - from scipy import optimize - - # Trend + Seasonal model - def func(t, c, m, n, a, p): - """ Seasonality with amplitude and phase. """ - # return a * np.sin(2*np.pi * t + p) - return c + m * t + n * t ** 2 + a * np.sin(2 * np.pi * t + p) - - # Fit seasonality - ii = ~np.isnan(tc_) & ~np.isnan(hc_) - t_season, h_season = tc_[ii], hc_[ii] - params, params_covariance = optimize.curve_fit( - func, t_season, h_season, p0=[0.0, 0.0, 0.0, 0.1, 0.1] - ) - a0, a1, a2, amp, pha = params - t_season = np.linspace(t_season.min(), t_season.max(), 100) - h_season = func(t_season, a0, a1, a2, amp, pha) - - # mask NaNs for plotting - mask = np.isfinite(hc_b_) - - plt.figure(figsize=(6, 8)) - - plt.subplot(4, 1, 1) - plt.plot(tc_, hc_, ".") - plt.plot(tc_b_[mask], hc_b_[mask], "-") - if plot_season: - plt.plot(t_season, h_season, "-r") - plt.ylabel("Height (m)") - plt.title("Processed time series") - if plot_season: - plt.title("Amplitude = %.2f, Phase = %.2f" % (amp, pha)) - - plt.subplot(4, 1, 2) - plt.plot(tc_, bc_, ".") - plt.plot(tc_b_[mask], bc_b_[mask], "-") - plt.ylabel("Bs (s.d.)") - - plt.subplot(4, 1, 3) - plt.plot(tc_, wc_, ".") - plt.plot(tc_b_[mask], wc_b_[mask], "-") - plt.ylabel("LeW (s.d.)") - - plt.subplot(4, 1, 4) - plt.plot(tc_, sc_, ".") - plt.plot(tc_b_[mask], sc_b_[mask], "-") - plt.ylabel("TeS (s.d.)") - - plt.figure() - plt.plot(x, y, ".", color="0.6", zorder=1) - plt.scatter(xc, yc, c=hc_cor, s=5, vmin=-1, vmax=1, zorder=2) - plt.plot(np.nanmedian(xc), np.nanmedian(yc), "o", color="red", zorder=3) - plt.title("Tracks") - - # Plot Spectrum - if 1: - - tc, hc, bc, wc, sc = tc_, hc_, bc_, wc_, sc_ # processed TS - - from astropy.stats import LombScargle - - periods = np.arange(3 / 12.0, 1.5, 0.01) - freq = 1 / periods - - hc[np.isnan(hc)] = 0.0 - bc[np.isnan(bc)] = 0.0 - wc[np.isnan(wc)] = 0.0 - sc[np.isnan(sc)] = 0.0 - - ls = LombScargle(tc, hc, nterms=1) - power_h = ls.power(freq) - ls = LombScargle(tc, bc, nterms=1) - power_b = ls.power(freq) - ls = LombScargle(tc, wc, nterms=1) - power_w = ls.power(freq) - ls = LombScargle(tc, sc, nterms=1) - power_s = ls.power(freq) - - plt.figure(figsize=(6, 8)) - - plt.subplot(4, 1, 1) - plt.plot(freq, power_h, linewidth=2) - plt.xlabel("Frequency (cycles/year)") - plt.ylabel("Power (1/RMSE)") - plt.title("Spectrum of processed time series") - - plt.subplot(4, 1, 2) - plt.plot(freq, power_b, linewidth=2) - plt.xlabel("Frequency (cycles/year)") - plt.ylabel("Power (1/RMSE)") - - plt.subplot(4, 1, 3) - plt.plot(freq, power_w, linewidth=2) - plt.xlabel("Frequency (cycles/year)") - plt.ylabel("Power (1/RMSE)") - - plt.subplot(4, 1, 4) - plt.plot(freq, power_s, linewidth=2) - plt.xlabel("Frequency (cycles/year)") - plt.ylabel("Power (1/RMSE)") - - # Plot Crosscorrelation - if 1: - - tc, hc, bc, wc, sc = tc_, hc_, bc_, wc_, sc_ # processed TS - - plt.figure(figsize=(6, 8)) - - plt.subplot(311) - plt.xcorr(hc, bc) - plt.ylabel("h x bs") - plt.title("Crosscorrelation") - - plt.subplot(312) - plt.xcorr(hc, wc) - plt.ylabel("h x LeW") - - plt.subplot(313) - plt.xcorr(hc, sc) - plt.ylabel("h x TeS") - - tc, hc, bc, wc, sc = tc_, hc_, bc_, wc_, sc_ # processed TS - - plt.figure(figsize=(3, 9)) - - plt.subplot(311) - plt.plot(bc, hc, ".") - # plt.title('Correlation Bs x h (%s)' % str(proc)) - plt.xlabel("Bs (s.d.)") - plt.ylabel("h (m)") - plt.title("Correlation of processed time series") - - plt.subplot(312) - plt.plot(wc, hc, ".") - # plt.title('Correlation Bs x h (%s)' % str(proc)) - plt.xlabel("LeW (s.d.)") - plt.ylabel("h (m)") - - plt.subplot(313) - plt.plot(sc, hc, ".") - # plt.title('Correlation Bs x h (%s)' % str(proc)) - plt.xlabel("TeS (s.d.)") - plt.ylabel("h (m)") - - print("Summary:") - print("--------") - print("cor applied: ", (h_bs[~np.isnan(h_bs)] != 0).any()) - print( - "std change: %.3f m (%.1f %%)" - % (round(d_std, 3), round(p_std * 100, 1)) - ) - print( - "trend change: %.3f m/yr (%.1f %%)" - % (round(d_trend, 3), round(p_trend * 100, 1)) - ) - print("") - print("r-squared: ", round(r2, 3)) - print("p-value: ", round(pval, 3)) - print("p-values: ", [round(p, 3) for p in pvals]) - print("") - print("r_bs: ", round(r_bc, 3)) - print("r_lew: ", round(r_wc, 3)) - print("r_tes: ", round(r_sc, 3)) - print("") - print("s_bs: ", round(s_bc, 3)) - print("s_lew: ", round(s_wc, 3)) - print("s_tes: ", round(s_sc, 3)) - - plt.show() - - -def main( - ifile, - vnames, - wnames, - dxy, - proj, - radius=0, - n_reloc=0, - proc=None, - apply_=False, -): - - if is_empty(ifile): - print("empty file... skipping!!!") - return - - if TEST_MODE: - print("*********************************************************") - print("* RUNNING IN TEST MODE (PLOTTING ONLY, NOT SAVING DATA) *") - print("*********************************************************") - - print("processing file:", ifile, "...") - - # Test if parameter file exists - if "_scatgrd" in ifile.lower(): - return - - xvar, yvar, zvar, tvar = vnames - bpar, wpar, spar = wnames - - # Load full data into memory (only once) - with h5py.File(ifile, "r") as fi: - - t = fi[tvar][:] - h = fi[zvar][:] - lon = fi[xvar][:] - lat = fi[yvar][:] - bs = fi[bpar][:] - lew = fi[wpar][:] - tes = fi[spar][:] - - # Convert into sterographic coordinates - x, y = transform_coord("4326", proj, lon, lat) - - # Get bbox from data - xmin_d, xmax_d, ymin_d, ymax_d = x.min(), x.max(), y.min(), y.max() - - # If no bbox given, limits are defined by data - if bbox[0] is None: - xmin, xmax, ymin, ymax = xmin_d, xmax_d, ymin_d, ymax_d - else: - xmin, xmax, ymin, ymax = bbox - - # Grid solution - defined by nodes - Xi, Yi = make_grid(xmin, xmax, ymin, ymax, dxy, dxy) - - # Flatten prediction grid - x_nodes, y_nodes = Xi.ravel(), Yi.ravel() - - """ Create output containers """ - - N_data = len(x) - N_nodes = len(x_nodes) - - # Values for each data point - r2fit = np.full(N_data, 0.0) # r2 of the multivar fit - pval = np.full(N_data, np.nan) # r2 of the multivar fit - dstd = np.full(N_data, np.nan) # magnitude std change after cor - dtrend = np.full(N_data, np.nan) # magnitude trend change after cor - pstd = np.full( - N_data, np.inf - ) # perc std change after cor ##NOTE: Init w/inf - ptrend = np.full(N_data, np.nan) # perc trend change after cor - hbs = np.full(N_data, np.nan) # scatt cor from multivar fit - rbs = np.full(N_data, np.nan) # corr coef h x Bs - rlew = np.full(N_data, np.nan) # corr coef h x LeW - rtes = np.full(N_data, np.nan) # corr coef h x TeS - sbs = np.full(N_data, np.nan) # sensit h x Bs - slew = np.full(N_data, np.nan) # sensit h x LeW - stes = np.full(N_data, np.nan) # sensit h x TeS - bbs = np.full(N_data, np.nan) # multivar fit coef a.Bs - blew = np.full(N_data, np.nan) # multivar fit coef b.LeW - btes = np.full(N_data, np.nan) # multivar fit coef c.TeS - - # Values for each node - r2fitc = np.full(N_nodes, 0.0) - dstdc = np.full(N_nodes, np.nan) - dtrendc = np.full(N_nodes, np.nan) - pstdc = np.full(N_nodes, np.inf) # NOTE: Init w/inf - ptrendc = np.full(N_nodes, np.nan) - rbsc = np.full(N_nodes, np.nan) - rlewc = np.full(N_nodes, np.nan) - rtesc = np.full(N_nodes, np.nan) - sbsc = np.full(N_nodes, np.nan) - slewc = np.full(N_nodes, np.nan) - stesc = np.full(N_nodes, np.nan) - bbsc = np.full(N_nodes, np.nan) - blewc = np.full(N_nodes, np.nan) - btesc = np.full(N_nodes, np.nan) - lonc = np.full(N_nodes, np.nan) - latc = np.full(N_nodes, np.nan) - - # Select cells at random (for testing) - if TEST_MODE: - if USE_NODES: - # Convert into sterographic coordinates - x_nodes = [ - transform_coord("4326", "3031", xp, yp)[0] for xp, yp in NODES - ] - y_nodes = [ - transform_coord("4326", "3031", xp, yp)[1] for xp, yp in NODES - ] - else: - if USE_SEED: - np.random.seed(SEED) # not so random! - # Select a few random nodes - ii = np.random.randint(0, N_nodes, N_CELLS) - x_nodes, y_nodes = x_nodes[ii], y_nodes[ii] - N_nodes = len(x_nodes) - - # Build KD-Tree with polar stereo coords - x, y = transform_coord(4326, proj, lon, lat) - Tree = cKDTree(list(zip(x, y))) - - # Loop through nodes - for k in range(N_nodes): - - if (k % 500) == 0: - print("Calculating correction for node", k, "of", N_nodes, "...") - - x0, y0 = x_nodes[k], y_nodes[k] - - # If search radius doesn't contain data, skip - if not intersect( - x0 - radius, - x0 + radius, - y0 - radius, - y0 + radius, - xmin_d, - xmax_d, - ymin_d, - ymax_d, - ): - continue - - # Get indices of data within search radius - i_cell = get_radius_idx(x, y, x0, y0, radius, Tree, n_reloc=n_reloc) - - # If cell empty or not enough data go to next node - if len(i_cell) < MIN_PTS: - continue - - # Get all data within the grid search radius - tc = t[i_cell] - hc = h[i_cell] - xc = x[i_cell] - yc = y[i_cell] - bc = bs[i_cell] - wc = lew[i_cell] - sc = tes[i_cell] - - # Keep original (unfiltered) data - tc_orig, hc_orig = tc.copy(), hc.copy() - - PLOT_INTERP = False - if PLOT_INTERP: - bc0, wc0, sc0 = ( - bc.copy(), - wc.copy(), - sc.copy(), - ) # NOTE: for plotting - - # Filter invalid points - tc, hc, bc, wc, sc = filter_data( - tc, hc, bc, wc, sc, n_sigma=3, window=3 / 12.0 - ) - - if PLOT_INTERP: - bc1, wc1, sc1 = ( - bc.copy(), - wc.copy(), - sc.copy(), - ) # NOTE: for plotting - - # Test minimum number of obs in all params - nobs = min([len(v[~np.isnan(v)]) for v in [hc, bc, wc, sc]]) - - # Test for enough points - if nobs < MIN_PTS: - continue - - # Bin at monthly intervals to check temporal sampling - h_bin = binning(tc, hc, dx=1 / 12.0, window=3 / 12.0, interp=False)[1] - if sum(~np.isnan(h_bin)) < MIN_MONTHS: - continue - - # Interpolate missing w/f params based on h series - # bc, wc, sc = interp_params(tc, hc, bc, wc, sc) - # NOTE: Leave this out for now (only use valid entries in ALL series) - - if PLOT_INTERP: - bc2, wc2, sc2 = ( - bc.copy(), - wc.copy(), - sc.copy(), - ) # NOTE: for plotting - - # Plot interpolated time-series points - if PLOT_INTERP: - - bc2[bc2 == bc1] = np.nan - wc2[wc2 == wc1] = np.nan - sc2[sc2 == sc1] = np.nan - - plt.subplot(411) - plt.plot(tc, hc_orig, ".") - plt.plot(tc, hc, ".") - plt.subplot(412) - plt.plot(tc, bc0, ".") - plt.plot(tc, bc1, ".") - plt.plot(tc, bc2, "x") - plt.subplot(413) - plt.plot(tc, wc0, ".") - plt.plot(tc, wc1, ".") - plt.plot(tc, wc2, "x") - plt.subplot(414) - plt.plot(tc, sc0, ".") - plt.plot(tc, sc1, ".") - plt.plot(tc, sc2, "x") - - plt.figure() - plt.plot(x, y, ".", color=".5", rasterized=True) - plt.plot(xc, yc, ".") - plt.plot(xc[~np.isnan(hc)], yc[~np.isnan(hc)], ".") - - plt.show() - continue - - if BIN_SERIES: - hc_bin = binning( - tc, hc, median=True, window=3 / 12.0, interp=True - )[1] - bc_bin = binning( - tc, bc, median=True, window=3 / 12.0, interp=True - )[1] - wc_bin = binning( - tc, wc, median=True, window=3 / 12.0, interp=True - )[1] - sc_bin = binning( - tc, sc, median=True, window=3 / 12.0, interp=True - )[1] - else: - hc_bin = hc - bc_bin = bc - wc_bin = wc - sc_bin = sc - - # Ensure zero mean on all variables - hc, bc, wc, sc = center(hc, bc, wc, sc) - hc_bin, bc_bin, wc_bin, sc_bin = center(hc_bin, bc_bin, wc_bin, sc_bin) - - # Normalize the w/f params to std = 1 - bc, wc, sc = normalize(bc, wc, sc) - bc_bin, wc_bin, sc_bin = normalize(bc_bin, wc_bin, sc_bin) - - if proc == "det": - # Detrend time series - hc_res = detrend_binned(tc, hc_bin, order=2)[0] - bc_res = detrend_binned(tc, bc_bin, order=2)[0] - wc_res = detrend_binned(tc, wc_bin, order=2)[0] - sc_res = detrend_binned(tc, sc_bin, order=2)[0] - else: - # Savitzky-Golay numerical diff - hc_res = sgolay1d( - hc_bin, WINDOW, ORDER, DERIV - ) # FIXME: Think what dt to use here (uneven scattered points) - bc_res = sgolay1d(bc_bin, WINDOW, ORDER, DERIV) - wc_res = sgolay1d(wc_bin, WINDOW, ORDER, DERIV) - sc_res = sgolay1d(sc_bin, WINDOW, ORDER, DERIV) - - # Get coefs from multivariate fit - b_bc, b_wc, b_sc, r2, pval, pvals = multi_fit_coef( - tc, hc_res, bc_res, wc_res, sc_res - ) - - if sum([b_bc, b_wc, b_sc]) == 0: - continue - - # Get linear combination of original FILTERED series - # h_bs = a Bs + b LeW + c TeS - hc_bs = np.dot(np.vstack((bc, wc, sc)).T, [b_bc, b_wc, b_sc]) - - # NOTE 1: The correction is generated using the original w/f series - # NOTE 2: Use np.dot to preseved NaNs from original (filtered) series - - if np.isnan(hc_bs).all(): - continue - - # Apply correction to height - hc_cor = hc - hc_bs - - # Calculate correlation between h and waveform params - r_bc, r_wc, r_sc = corr_coef(hc_res, bc_res, wc_res, sc_res) - - # Calculate sensitivity values (corr grad) - s_bc, s_wc, s_sc = corr_grad( - hc_res, bc_res, wc_res, sc_res, normalize=False - ) - - # Calculate variance change (magnitude and perc) - d_std, p_std = std_change(tc, hc, hc_cor, order=1) - - # Calculate trend change (magnitude and perc) - d_trend, p_trend = trend_change(tc, hc, hc_cor) - - if np.isnan([d_std, p_std, d_trend, p_std]).any(): - continue - - # Calculate RMSE between detrended series - # d_rmse = rmse(tc, hc, hc_cor, order=1) - - # Test if at least one correlation is significant - # r_cond = (np.abs(r_bc) < R_MIN and np.abs(r_wc) \ - # < R_MIN and np.abs(r_sc) < R_MIN) - - # NOTE: We are ignoring the 'pval' (significance of fit). - # NOTE: All we care about is the reduction in variance. - - # Do not apply correction if: std increases by more than 5% - if p_std > 0.05: - - # Cor is set to zero (keeping NaNs) - hc_cor = hc.copy() # back to original - hc_bs[ - ~np.isnan(hc_bs) - ] = 0.0 # hc_bs keeps NaNs from filtered out values - - """ - # All params are set to zero/one - b_bc, b_wc, b_sc = 0., 0., 0. - r_bc, r_wc, r_sc = 0., 0., 0. - s_bc, s_wc, s_sc = 0., 0., 0. - r2, pval, pvals = 0., 1e3, (1e3, 1e3, 1e3) - d_std, p_std, d_trend, p_trend = 0., 0., 0., 0. - """ - - # Plot individual grid cells for testing - if TEST_MODE: - plt.figure(figsize=(6, 2)) - plt.plot(tc_orig, hc_orig, ".", color="0.3") - plot( - x, - y, - xc, - yc, - tc, - hc, - bc, - wc, - sc, - hc_res, - bc_res, - wc_res, - sc_res, - hc_cor, - hc_bs, - r_bc, - r_wc, - r_sc, - s_bc, - s_wc, - s_sc, - d_std, - p_std, - d_trend, - p_trend, - r2, - pval, - pvals, - ) - - """ Store results (while checking previously stored estimates) """ - - # Check where previously stored values need update - # r2_prev <= r2_new (new r2 is larger) - # i_update, = np.where(r2fit[i_cell] <= r2) - (i_update,) = np.where( - pstd[i_cell] > p_std - ) # std_prev > std_new (new std is lower) - - # Remove indices and values that will not be updated - i_cell_new = [i_cell[i] for i in i_update] - hc_bs_new = hc_bs[i_update] - - # Store correction for cell: only the improved values - hbs[i_cell_new] = hc_bs_new # set of values - r2fit[i_cell_new] = r2 # one value (same for all) - dstd[i_cell_new] = d_std - pstd[i_cell_new] = p_std - dtrend[i_cell_new] = d_trend - ptrend[i_cell_new] = p_trend - rbs[i_cell_new] = r_bc # corr coef - rlew[i_cell_new] = r_wc - rtes[i_cell_new] = r_sc - sbs[i_cell_new] = s_bc # sensitivity - slew[i_cell_new] = s_wc - stes[i_cell_new] = s_sc - bbs[i_cell_new] = b_bc # multivar fit coef - blew[i_cell_new] = b_wc - btes[i_cell_new] = b_sc - - # Compute centroid of cell - xc_ = np.nanmedian(xc) - yc_ = np.nanmedian(yc) - - # Convert x/y -> lon/lat - lon_c, lat_c = transform_coord(proj, 4326, xc_, yc_) - - # Store one s and r value per cell - lonc[k] = lon_c - latc[k] = lat_c - r2fitc[k] = r2 - dstdc[k] = d_std - pstdc[k] = p_std - dtrendc[k] = d_trend - ptrendc[k] = p_trend - rbsc[k] = r_bc - rlewc[k] = r_wc - rtesc[k] = r_sc - sbsc[k] = s_bc - slewc[k] = s_wc - stesc[k] = s_sc - bbsc[k] = b_bc - blewc[k] = b_wc - btesc[k] = b_sc - - """ Correct h (full dataset) with best values """ - - if apply_: - h[~np.isnan(hbs)] -= hbs[~np.isnan(hbs)] - - """ Save data """ - - if not TEST_MODE: - print("saving data ...") - - with h5py.File(ifile, "a") as fi: - - # Update h in the file and save correction (all cells at once) - if apply_: - fi[zvar][:] = h - - # Try to create varibales - try: - # Save params for each point - fi[H_BS] = hbs - fi["r2"] = r2fit - fi["d_std"] = dstd - fi["p_std"] = pstd - fi["d_trend"] = dtrend - fi["p_trend"] = ptrend - fi["r_bs"] = rbs - fi["r_lew"] = rlew - fi["r_tes"] = rtes - fi["s_bs"] = sbs - fi["s_lew"] = slew - fi["s_tes"] = stes - fi["b_bs"] = bbs - fi["b_lew"] = blew - fi["b_tes"] = btes - - # FIXME: Check if this is a good idea. - # Content of input file is being deleted! Removing for now! - # Update variables instead - except IOError: - """ - # Save params for each point - fi[H_BS][:] = hbs - fi['r2'][:] = r2fit - fi['d_std'][:] = dstd - fi['p_std'][:] = pstd - fi['d_trend'][:] = dtrend - fi['p_trend'][:] = ptrend - fi['r_bs'][:] = rbs - fi['r_lew'][:] = rlew - fi['r_tes'][:] = rtes - fi['s_bs'][:] = sbs - fi['s_lew'][:] = slew - fi['s_tes'][:] = stes - fi['b_bs'][:] = bbs - fi['b_lew'][:] = blew - fi['b_tes'][:] = btes - """ - print("SOME PROBLEM WITH THE FILE") - print("PARAMETERS NOT SAVED!") - print(ifile) - return - - # Only rename file if _SCAT has not been added - if ifile.find(SUFFIX1 + ".h5") < 0: - os.rename(ifile, ifile.replace(".h5", SUFFIX1 + ".h5")) - - # Save bs params as external file - with h5py.File(ifile.replace(".h5", SUFFIX2 + ".h5"), "w") as fo: - - # Try to svave variables - try: - # Save varibales - fo["lon"] = lonc - fo["lat"] = latc - fo["r2"] = r2fitc - fo["d_std"] = dstdc - fo["p_std"] = pstdc - fo["d_trend"] = dtrendc - fo["p_trend"] = ptrendc - fo["r_bs"] = rbsc - fo["r_lew"] = rlewc - fo["r_tes"] = rtesc - fo["s_bs"] = sbsc - fo["s_lew"] = slewc - fo["s_tes"] = stesc - fo["b_bs"] = bbsc - fo["b_lew"] = blewc - fo["b_tes"] = btesc - - # Catch any exceptions - except IOError: - # Exit program - print("COUND NOT SAVE PARAMETERS FOR EACH CELL (SCATGRD)") - return - - -if __name__ == "__main__": - - # Pass arguments - args = get_args() - ifiles = args.files[:] # input files - vnames = args.vnames[:] # lon/lat/h/time variable names - wnames = args.wnames[:] # variables to use for correction - dxy = args.dxy[0] * 1e3 # grid-cell length (km -> m) - radius = args.radius[0] * 1e3 # search radius (km -> m) - nreloc = args.nreloc[0] # number of relocations - proc = args.proc[0] # det, dif, bin or None series - proj = args.proj[0] # EPSG proj number - apply_ = args.apply # Apply cor in addition to saving - njobs = args.njobs[0] # parallel writing - tmin = args.tlim[0] # min time in decimal years - tmax = args.tlim[1] # max time in decimal years - bbox = args.bbox[:] - - print("parameters:") - for arg in vars(args).items(): - print(arg) - - if njobs == 1: - print("running sequential code ...") - [ - main( - ifile, vnames, wnames, dxy, proj, radius, nreloc, proc, apply_ - ) - for ifile in ifiles - ] - else: - print("running parallel code (%d jobs) ..." % njobs) - from joblib import Parallel, delayed - - Parallel(n_jobs=njobs, verbose=5)( - delayed(main)( - ifile, vnames, wnames, dxy, proj, radius, nreloc, proc, apply_ - ) - for ifile in ifiles - ) - - print("done!") diff --git a/captoolkit/joingrd.py.bak b/captoolkit/joingrd.py.bak deleted file mode 100755 index d9a9d4f..0000000 --- a/captoolkit/joingrd.py.bak +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env python -"""Join a set of geographical tiles (subgrids in individual files). - -Reads and writes in chunks, so it doesn't load all the data into memory. - -Uses bbox and proj information from the file name for removing the -overlap between tiles if this information is present, otherwise just -"concatenates" all the subgrids. - -Notes: - - The HDF5 files must all have the same 2d shape with 1d x/y coords. - - For joining data points into a single large file see 'join.py'. - - If several mosaics (i.e. 1 grid per time step), joins each in parallel. - - Groups input files with same 'time_key' (see below) for multiple mosaics. - - Bedmap boundaries: -b -3333000 3333000 -3333000 3333000 - Ross boundaries: -b -600000 410000 -1400000 -400000 - -Examples: - python joingrd.py ~/data/cryosat2/floating/latest/*_D_* - -b -600000 410000 -1400000 -400000 -k tile -o joined_D.h5 - -""" -import os -import re -import sys -import h5py -import pyproj -import argparse -import itertools -import tables as tb -import numpy as np - -time_key = 'time' - - -def get_args(): - """ Get command-line arguments. """ - parser = argparse.ArgumentParser( - description='Join tiles (individual files).') - parser.add_argument( - 'files', metavar='files', type=str, nargs='+', - help='files to join into a single HDF5 file') - parser.add_argument( - '-o', metavar=('outfile'), dest='ofile', type=str, nargs=1, - help=('output file for joined tiles'), - default=['joined_tiles.h5'],) - parser.add_argument( - '-v', metavar=('lon','lat'), dest='vnames', type=str, nargs=2, - help=('name of lon/lat variables in the files'), - default=['x', 'y'],) - parser.add_argument( - '-b', metavar=('w','e','s','n'), dest='bbox', type=float, nargs=4, - help=('bounding box for geographic region (deg or m)'), - default=[], required=True) - parser.add_argument( - '-u', dest='flipy', action='store_true', - help=('flip final grids upside-down (y-axis)'), - default=False) - parser.add_argument( - '-z', metavar=None, dest='comp', type=str, nargs=1, - help=('compress joined file(s)'), - choices=('lzf', 'gzip'), default=[None],) - parser.add_argument( - '-k', metavar=('keyword'), dest='key', type=str, nargs=1, - help=('keyword in file name for sorting by tile number ' - '(e.g. "tile"_NNN.h5)'), - default=[None],) - parser.add_argument( - '-n', metavar=('njobs'), dest='njobs', type=int, nargs=1, - help='for parallel processing of multiple grids, optional', - default=[1],) - return parser.parse_args() - - -def print_args(args): - print('Input arguments:') - for arg in vars(args).items(): - print(arg) - - -def get_tile_bbox(fname, key='bbox'): - """ Extract bbox info from file name. """ - fname = fname.split('_') # fname -> list - i = fname.index(key) - return list(map(float, fname[i+1:i+5])) # m - - -def get_tile_proj(fname, key='epsg'): - """ Extract EPSG number from file name. """ - fname = fname.split('_') # fname -> list - i = fname.index(key) - return fname[i+1] - - -def get_key_num(fname, key='bin', sep='_'): - """ Given 'key' extract 'key_num' from string. """ - l = os.path.splitext(fname)[0].split(sep) # fname -> list - i = l.index(key) - return key + sep + l[i+1] - - -def get_tile_lenght(tile_bbox): - xmin, xmax, ymin, ymax = tile_bbox - return np.abs(xmax-xmin), np.abs(ymax-ymin) - - -def get_num_tiles(grid_bbox, tile_dx, tile_dy): - """ How many tiles per row and col -> (ny,nx). """ - xmin, xmax, ymin, ymax = grid_bbox - return (int(np.abs(ymax-ymin)/tile_dy), int(np.abs(xmax-xmin)/tile_dx)) - - -def get_tile_shape(fname, vname): - # vname is name of a 2d grid. - with h5py.File(fname, 'r') as f: return f[vname].shape - - -def get_grid_shape(tile_shape, num_tiles): - return tuple(np.array(tile_shape) * np.array(num_tiles)) - - -def get_grid_names(fname): - """ Return all 2d '/variable' names in the HDF5. """ - with h5py.File(fname, 'r') as f: - vnames = [k for k in list(f.keys()) if f[k].ndim == 2] - return vnames - - -def create_output_grids(ofile, grid_shape, vnames): - with h5py.File(ofile, 'w') as f: - [f.create_dataset(v, grid_shape, 'f', fillvalue=np.nan) for v in vnames] - - -def save_output_coord(ofile, xy, vnames): - with h5py.File(ofile, 'a') as f: - f[vnames[0]] = xy[0] - f[vnames[1]] = xy[1] - - -def get_grid_coord(bbox, grid_shape): - xmin, xmax, ymin, ymax = bbox - ny, nx = grid_shape - return np.linspace(xmin, xmax, nx), np.linspace(ymin, ymax, ny) - - -def map_tile_to_grid(x_grid, y_grid, tile_bbox): - xmin, xmax, ymin, ymax = tile_bbox - i, = np.where( (y_grid >= ymin) & (y_grid <= ymax) ) - j, = np.where( (x_grid >= xmin) & (x_grid <= xmax) ) - return (i, j) - - -def get_tile_position(x_grid, y_grid, tile_bbox): - i, j = map_tile_to_grid(x_grid, y_grid, tile_bbox) - return (i[0], i[-1]+1, j[0], j[-1]+1) - - -def group_by_key(files, key='bin'): - """Group files by key_number.""" - files_sorted = sorted(files, key=lambda f: get_key_num(f, key=key)) - return [list(v) for k,v in itertools \ - .groupby(files_sorted, key=lambda f: get_key_num(f, key=key))] - - -def flipud(fname, vnames): - with h5py.File(fname, 'a') as f: - for v in vnames: f[v][:] = np.flipud(f[v][:]) - print('final grids flipped upside-down') - - -def join(ifiles, suffix=''): - """Join a set of subgrids into a mosaic.""" - assert len(ifiles) > 1 - - # Sort input files on keyword number if provided - if key: - print('sorting input files ...') - natkey = lambda s: int(re.findall(key+'_\d+', s)[0].split('_')[-1]) - ifiles.sort(key=natkey) - - if suffix: suffix = '_' + get_key_num(ifiles[0], key=suffix) - ofile_ = ofile + suffix - - # Generate empty output grids w/coords on disk - tile_bbox = get_tile_bbox(ifiles[0]) - dx, dy = get_tile_lenght(tile_bbox) - num_tiles = get_num_tiles(grid_bbox, dx, dy) - vnames = get_grid_names(ifiles[0]) - tile_shape = get_tile_shape(ifiles[0], vnames[0]) - grid_shape = get_grid_shape(tile_shape, num_tiles) - x_grid, y_grid = get_grid_coord(grid_bbox, grid_shape) - create_output_grids(ofile_, grid_shape, vnames) - save_output_coord(ofile_, (x_grid, y_grid), (xvar, yvar)) - - # Iterate over tiles - for ifile in ifiles: - print('tile:', ifile) - - tile_bbox = get_tile_bbox(ifile) - i1,i2,j1,j2 = get_tile_position(x_grid, y_grid, tile_bbox) - - with h5py.File(ifile, 'r') as fi, h5py.File(ofile_, 'a') as fo: - for v in vnames: fo[v][i1:i2,j1:j2] = fi[v][:] - - if flipy: flipud(ofile_, vnames) - - print('joined tiles:', len(ifiles)) - print('out ->', ofile_) - - - -# Pass arguments -args = get_args() -ifiles = args.files # input files -ofile = args.ofile[0] # output file -grid_bbox = args.bbox[:] # bounding box EPSG (m) or geographical (deg) -xvar = args.vnames[0] # lon variable names -yvar = args.vnames[1] # lat variable names -flipy = args.flipy # flip final grid upside-down -key = args.key[0] # keyword for sorting -comp = args.comp[0] -njobs = args.njobs[0] - -print_args(args) - -try: - allfiles = group_by_key(ifiles, key=time_key) -except: - allfiles = [ifiles] - -multiple_grids = len(allfiles) > 1 - -if njobs == 1 or not multiple_grids: - print('Running sequential code ...') - [join(ifiles) for ifiles in allfiles] - -else: - print('Running parallel code (%d jobs) ...' % njobs) - from joblib import Parallel, delayed - Parallel(n_jobs=njobs, verbose=5)( - delayed(join)(ifiles, time_key) for ifiles in allfiles) diff --git a/captoolkit/merge.py.bak b/captoolkit/merge.py.bak deleted file mode 100755 index 06d47b6..0000000 --- a/captoolkit/merge.py.bak +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python -""" -Merges several HDF5 files into a single file or multiple larger files. - -Example - merge.py ifiles_*.h5 -o ofile.h5 - merge.py ifiles_*.h5 -o ofile.h5 -m 5 -n 5 - -Notes - - The parallel option (-n) only works for multiple outputs (-m)! - - If no 'key' is given, it merges files in the order they are passed/read. - - If receive "Argument list too long", pass a string. - - See complementary program: split.py - -Use case: - (ERS-2 over Ross Ice Shelf, ice mode asc) - python merge.py '/mnt/devon-r0/shared_data/ers/floating_/latest/*_ICE_*_A_*.h5' -o /mnt/devon-r0/shared_data/ers/floating_/latest/AntIS_ERS2_ICE_READ_A_ROSS_RM_IBE.h5 -m 8 -n 8 - -""" -import os -import sys -import h5py -import argparse -import numpy as np -from glob import glob - - -def get_args(): - """ Pass command-line arguments. """ - parser = argparse.ArgumentParser( - description='Merges several HDF5 files.') - parser.add_argument( - 'file', metavar='file', type=str, nargs='+', - help='HDF5 files to merge') - parser.add_argument( - '-o', metavar='ofile', dest='ofile', type=str, nargs=1, - help=('output file name'), - default=[None], required=True,) - parser.add_argument( - '-m', metavar='nfiles', dest='nfiles', type=int, nargs=1, - help=('number of merged files (blocks)'), - default=[1],) - parser.add_argument( - '-v', metavar='var', dest='vnames', type=str, nargs='+', - help=('only merge specific vars if given, otherwise merge all'), - default=[],) - parser.add_argument( - '-z', metavar=None, dest='comp', type=str, nargs=1, - help=('compress merged file(s)'), - choices=('lzf', 'gzip'), default=[None],) - parser.add_argument( - '-k', metavar='key', dest='key', type=str, nargs=1, - help=('sort files by numbers after `key` in file name'), - default=[None],) - parser.add_argument( - '-n', metavar='njobs', dest='njobs', type=int, nargs=1, - help=('number of jobs for parallel processing when using -m'), - default=[1],) - return parser.parse_args() - - -def get_total_len(ifiles): - """ Get total output length from all input files. """ - N = 0 - for fn in ifiles: - with h5py.File(fn) as f: - N += list(f.values())[0].shape[0] - return N - - -def get_var_names(ifile): - """ Return all '/variable' names in the HDF5. """ - with h5py.File(ifile, 'r') as f: - vnames = list(f.keys()) - return vnames - - -def get_multi_io(ifiles, ofile, nfiles): - """ Construct multiple input/output file names. """ - # List of groups of input files - ifiles = [list(arr) for arr in np.array_split(ifiles, nfiles)] - # List of output file names - fname = os.path.splitext(ofile)[0] + '_%02d.h5' - ofiles = [(fname % k) for k in range(len(ifiles))] - return ifiles, ofiles - - -def merge(ifiles, ofile, vnames, comp): - """ - Merge variables from several input files into a single file. - - Args: - ifiles (list): input file names. - ofile (str): output file name. - vnames (list): name of vars to merge. - """ - # Get length of output containers (from all input files) - print('Calculating lenght of output from all input files ...') - N = get_total_len(ifiles) - - with h5py.File(ofile, 'w') as f: - - # Create empty output containers (w/compression optimized for speed) - [f.create_dataset(key, (N,), dtype='float64', compression=comp) - for key in vnames] - - # Iterate over the input files - k1 = 0 - for ifile in ifiles: - print('reading', ifile) - - # Write next chunk (the input file) - with h5py.File(ifile) as f2: - k2 = k1 + list(f2.values())[0].shape[0] # first var/first dim - - # Iterate over all variables - for key in vnames: - f[key][k1:k2] = f2[key][:] - - k1 = k2 - - print('merged', len(ifiles), 'files') - print('output ->', ofile) - - -# Sort input files by key -def sort_files(ifiles, key=None): - """ Sort files by numbers *after* the key in the file name. """ - if key: - import re - print('sorting input files ...') - natkey = lambda s: int(re.findall(key+'_\d+', s)[0].split('_')[-1]) - ifiles.sort(key=natkey) - - -if __name__ == '__main__': - - args = get_args() - ifile = args.file[:] # list - ofile = args.ofile[0] # str - nfiles = args.nfiles[0] - vnames = args.vnames - comp = args.comp[0] - key = args.key[0] - njobs = args.njobs[0] - - # In case a string is passed to avoid "Argument list too long" - if len(ifile) == 1: - ifile = glob(ifile[0]) - - # Sort files before merging - sort_files(ifile, key=key) - - # Get var names from first file, if not provided - vnames = get_var_names(ifile[0]) if not vnames else vnames - - # Groups of input files -> multiple output files - if nfiles > 1: - ifile, ofile = get_multi_io(ifile, ofile, nfiles) - else: - ifile, ofile = [ifile], [ofile] - - if njobs > 1 and nfiles > 1: - print('Running parallel code (%d jobs) ...' % njobs) - from joblib import Parallel, delayed - Parallel(n_jobs=njobs, verbose=5)( - delayed(merge)(fi, fo, vnames, comp) \ - for fi,fo in zip(ifile, ofile)) - else: - print('Running sequential code ...') - [merge(fi, fo, vnames, comp) for fi,fo in zip(ifile, ofile)] - diff --git a/captoolkit/tile.py.bak b/captoolkit/tile.py.bak deleted file mode 100755 index 392b0bb..0000000 --- a/captoolkit/tile.py.bak +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python -""" -Tile geographical data for (embarrassingly) parallelization. - -Takes as input tile size, overlap (km), and projection used to make -the tiles (EPSG-format). - -The code can deal with very-large files by reading/searching/saving -in chunks from/to disk. - -Large files may slowdown the tiling process, but it's "guarantee" to -complete the tilling. - -Notes: - Bedmap boundaries: -b -3333000 3333000 -3333000 3333000 - Ross boundaries: -b -600000 410000 -1400000 -400000 - ITS_LIVE boundaries: -b -2678407.5 2816632.5 -2154232.5 2259847.5 - -Credits: - captoolkit - JPL Cryosphere Altimetry Processing Toolkit - - Fernando Paolo (paolofer@jpl.nasa.gov) - Johan Nilsson (johan.nilsson@jpl.nasa.gov) - Alex Gardner (alex.s.gardner@jpl.nasa.gov) - - Jet Propulsion Laboratory, California Institute of Technology - -""" -import os -import sys -import h5py -import pyproj -import argparse -import tables as tb -import pandas as pd -import numpy as np -from glob import glob - - -# Optimal chunk size -chunks = 100000 - - -def get_args(): - """ Get command-line arguments. """ - parser = argparse.ArgumentParser( - description='Split geographical data into (overlapping) tiles') - parser.add_argument( - 'file', metavar='file', type=str, nargs='+', - help='single or multiple file(s) to split in tiles (HDF5)') - parser.add_argument( - '-b', metavar=('w','e','s','n'), dest='bbox', type=float, nargs=4, - help=('bounding box for geographic region (deg or m)'), - default=[], required=True) - parser.add_argument( - '-d', metavar=('length'), dest='dxy', type=float, nargs=1, - help=('block size of tile (km)'), - default=[], required=True) - parser.add_argument( - '-r', metavar=('buffer'), dest='dr', type=float, nargs=1, - help=("overlap between tiles (km)"), - default=[0],) - parser.add_argument( - '-v', metavar=('lon','lat'), dest='vnames', type=str, nargs=2, - help=('name of x/y variables'), - default=[None, None],) - parser.add_argument( - '-j', metavar=('epsg_num'), dest='proj', type=str, nargs=1, - help=('EPSG proj number (AnIS=3031, GrIS=3413)'), - default=['3031'],) - parser.add_argument( - '-n', metavar=('njobs'), dest='njobs', type=int, nargs=1, - help="for parallel writing of multiple tiles, optional", - default=[1],) - return parser.parse_args() - - -def print_args(args): - print('Input arguments:') - for arg in vars(args).items(): - print(arg) - - -def transform_coord(proj1, proj2, x, y): - """Transform coordinates from proj1 to proj2 (EPSG num).""" - proj1 = pyproj.Proj("+init=EPSG:"+proj1) - proj2 = pyproj.Proj("+init=EPSG:"+proj2) - return pyproj.transform(proj1, proj2, x, y) - - -def get_xy(ifile, vnames=['lon', 'lat'], proj='3031'): - """ Get lon/lat from input file and convert to x/y. """ - xvar, yvar = vnames - with tb.open_file(ifile) as fi: - x = fi.get_node('/', xvar)[:] - y = fi.get_node('/', yvar)[:] - return transform_coord('4326', proj, x, y) - - -def add_suffix(fname, suffix=''): - path, ext = os.path.splitext(fname) - return path + suffix + ext - - -def get_tile_bboxs(grid_bbox, dxy): - """ Define bbox of tiles given bbox of grid and tile size. """ - - xmin, xmax, ymin, ymax = grid_bbox - - # Number of tile edges on each dimension - Nns = int(np.abs(ymax - ymin) / dxy) + 1 - New = int(np.abs(xmax - xmin) / dxy) + 1 - - # Coord of tile edges for each dimension - xg = np.linspace(xmin, xmax, New) - yg = np.linspace(ymin, ymax, Nns) - - # Vector of bbox for each tile ##NOTE: Nested loop! - bboxs = [(w,e,s,n) for w,e in zip(xg[:-1], xg[1:]) - for s,n in zip(yg[:-1], yg[1:])] - del xg, yg - - return bboxs - - -def get_tile_data(ifile, x, y, bbox, buff=1, proj='3031', tile_num=0): - """ Extract data within bbox and save to individual file. """ - - xmin, xmax, ymin, ymax = bbox - - # Open input file (out-of-core) - fi = tb.open_file(ifile) - - # Get all 1d variables into a list (out-of-core) - points = [fi.get_node('/', v.name) for v in fi.list_nodes('/')] - - npts = 0 - nrow = x.shape[0] - first_iter = True - - # Read and write in chunks - for i in range(0, nrow, chunks): - - k = min(i+chunks, nrow) - - x_chunk = x[i:k] - y_chunk = y[i:k] - - # Get the tile indices - idx, = np.where( (x_chunk >= xmin-buff) & (x_chunk <= xmax+buff) & - (y_chunk >= ymin-buff) & (y_chunk <= ymax+buff) ) - - # Leave chunk if outside tile - if len(idx) == 0: continue - - # Get chunk of data in-memory, and - # Query chunk in-memory - points_chunk = [d[i:k] for d in points] # -> list of 1d chunks - points_chunk = [d[idx] for d in points_chunk] - - # Create output file on first iteration - if first_iter: - - # Define file name - suffix = ('_bbox_%d_%d_%d_%d_buff_%g_epsg_%s_tile_%03d' % \ - (xmin, xmax, ymin, ymax, buff/1e3, proj, tile_num)) - - ofile = add_suffix(ifile, suffix) - - fo = tb.open_file(ofile, 'w') - - # Initialize containers (lenght=0) - out = [fo.create_earray('/', v.name, tb.Float64Atom(), shape=(0,)) - for v in points] - - first_iter = False - - # Save chunk - [v.append(d) for v, d in zip(out, points_chunk)] - npts += points_chunk[0].shape[0] - - fo.flush() - - if npts != 0: print('tile %03d: #points' % tile_num, npts, '...') - - try: - fo.close() - except: - pass - - fi.close() - - -def count_files(ifiles, key='*tile*'): - saved = [] - for f in ifiles: - path, ext = os.path.splitext(f) - saved.extend(glob(path + key)) - return len(saved) - - -# Pass arguments -args = get_args() -ifiles = args.file[:] # input file(s) -vnames = args.vnames[:] # lon/lat variable names -bbox_ = args.bbox # bounding box EPSG (m) or geographical (deg) -dr = args.dr[0] * 1e3 # buffer (km -> m) -dxy = args.dxy[0] * 1e3 # tile length (km -> m) -proj = args.proj[0] # EPSG proj number -njobs = args.njobs[0] # parallel writing - -print_args(args) - -if len(ifiles) == 1: ifiles = glob(ifiles[0]) # pass str if "Argument list too long" - -# Generate list of items (only once!) for parallel proc -print('generating list of tasks (files x tiles) ...') - -# NOTE: To avoid reprojecting coords (lon/lat -> x/y) several -# times for each file (one conversion per tile), coordinates are -# loaded into mem, converted once per file and passed to each worker. - -bboxs = get_tile_bboxs(bbox_, dxy) # [b1, b2..] -xys = [get_xy(f, vnames, proj) for f in ifiles] # [(x1,y1), (x2,y2)..] -fxys = [(f,x,y) for f,(x,y) in zip(ifiles, xys)] # [(f1,x1,y1), (f2,x2,y2)..] -fxybs = [(f,x,y,b,n+1) for (f,x,y) in fxys for n,b in enumerate(bboxs)] # [(f1,x1,y1,b1,1), (f1,x1,y1,b2,2)..] - -print('number of files:', len(ifiles)) -print('number of tiles:', len(bboxs)) -print('number of tasks:', len(fxybs)) - -# NOTE: For each bbox scan full file - -if njobs == 1: - print('running sequential code ...') - [get_tile_data(f, x, y, b, dr, proj, n) for f,x,y,b,n in fxybs] - -else: - print('running parallel code (%d jobs) ...' % njobs) - from joblib import Parallel, delayed - Parallel(n_jobs=njobs, verbose=5)( - delayed(get_tile_data)(f, x, y, b, dr, proj, n) for f,x,y,b,n in fxybs) - -print('number of tiles with data:', count_files(ifiles)) diff --git a/captoolkit/txt2hdf.py b/captoolkit/txt2hdf.py index d61db84..40b94e0 100755 --- a/captoolkit/txt2hdf.py +++ b/captoolkit/txt2hdf.py @@ -38,32 +38,57 @@ # Pass command-line arguments parser = argparse.ArgumentParser( - description='Convert ASCII tables to HDF5.\n' - 'If no variable names are provided, save table as 2d array.') + description="Convert ASCII tables to HDF5.\n" + "If no variable names are provided, save table as 2d array." +) parser.add_argument( - 'files', metavar=('files'), type=str, nargs='+', - help='ASCII file(s) to convert (it can be a directory) PASS FILES FIRST!') + "files", + metavar=("files"), + type=str, + nargs="+", + help="ASCII file(s) to convert (it can be a directory) PASS FILES FIRST!", +) parser.add_argument( - '-v', metavar=('vars'), dest='vnames', type=str, nargs='+', - help=('name of variables in ASCII file (-v lon lat time...)'), - default=[None],) + "-v", + metavar=("vars"), + dest="vnames", + type=str, + nargs="+", + help=("name of variables in ASCII file (-v lon lat time...)"), + default=[None], +) parser.add_argument( - '-e', metavar=('.txt'), dest='ext', type=str, nargs=1, - help=('extension of ASCII files'), - default=['.txt'],) + "-e", + metavar=(".txt"), + dest="ext", + type=str, + nargs=1, + help=("extension of ASCII files"), + default=[".txt"], +) parser.add_argument( - '-n', metavar=('njobs'), dest='njobs', type=int, nargs=1, - help=('number of jobs for parallel processing (-n 1)'), - default=[NJOBS],) + "-n", + metavar=("njobs"), + dest="njobs", + type=int, + nargs=1, + help=("number of jobs for parallel processing (-n 1)"), + default=[NJOBS], +) parser.add_argument( - '-c', metavar=('chunk'), dest='chunk', type=int, nargs=1, - help=('chunksize (# of lines) for I/O (-c 100000)'), - default=[CHUNKSIZE],) + "-c", + metavar=("chunk"), + dest="chunk", + type=int, + nargs=1, + help=("chunksize (# of lines) for I/O (-c 100000)"), + default=[CHUNKSIZE], +) # Global variables args = parser.parse_args() @@ -75,27 +100,32 @@ sep = None # If None, tries to determine #TODO: Try a more clever way! -def list_files(path, endswith='.txt'): +def list_files(path, endswith=".txt"): """List files in dir recursively.""" - return [os.path.join(dpath, f) - for dpath, dnames, fnames in os.walk(path) - for f in fnames if f.endswith(endswith)] + return [ + os.path.join(dpath, f) + for dpath, dnames, fnames in os.walk(path) + for f in fnames + if f.endswith(endswith) + ] def init_vec(f, names, data): """Initialize resizable 1d arrays to hold the outputs.""" - + # Iterate over 'names' and columns of 'data' - dset = [(name, f.create_dataset(name, data=d, maxshape=(None,))) \ - for name, d in zip(names, data.T)] # -> list + dset = [ + (name, f.create_dataset(name, data=d, maxshape=(None,))) + for name, d in zip(names, data.T) + ] # -> list return OrderedDict(dset) def init_mat(f, name, data): """Initialize resizable 2d array to hold the output.""" - + # Iterate over 'names' and columns of 'data' - mat = f.create_dataset(name, data=data, maxshape=(None,data.shape[1])) + mat = f.create_dataset(name, data=data, maxshape=(None, data.shape[1])) return {name: mat} @@ -104,13 +134,13 @@ def save_as_vec(dset, nrow, data): # Iterate over columns for name, d in zip(list(dset.keys()), data.T): - + # Resize the datasets to accommodate next chunk of rows dset[name].resize(nrow + data.shape[0], axis=0) - + # Write next chunk dset[name][nrow:] = d - + return dset @@ -118,16 +148,16 @@ def save_as_mat(dset, nrow, data): """Save 'data' as a chunk in a 2d array.""" # Resize the datasets to accommodate next chunk of rows - dset['data'].resize(nrow + data.shape[0], axis=0) - + dset["data"].resize(nrow + data.shape[0], axis=0) + # Write next chunk - dset['data'][nrow:] = data - + dset["data"][nrow:] = data + return dset # Create variable name if not given -names = 'data' if vnames[0] is None else vnames +names = "data" if vnames[0] is None else vnames # Generate file list if directory given if len(files) == 1 and os.path.isdir(files[0]): @@ -143,47 +173,49 @@ def save_as_mat(dset, nrow, data): def main(infile): - print('converting ASCII table to HDF5 ...') + print("converting ASCII table to HDF5 ...") - outfile = os.path.splitext(infile)[0] + '.h5' + outfile = os.path.splitext(infile)[0] + ".h5" - with h5py.File(outfile, 'w') as f: + with h5py.File(outfile, "w") as f: # Read the first chunk to get the column structure - reader = pd.read_table(infile, sep=sep, header=None, - chunksize=chunksize, engine='python') + reader = pd.read_table( + infile, sep=sep, header=None, chunksize=chunksize, engine="python" + ) chunk = reader.get_chunk(chunksize) nrows, ncols = chunk.shape - if names == 'data': + if names == "data": dset = init_mat(f, names, chunk.values) else: dset = init_vec(f, names, chunk.values) - print(('lines saved:', nrows, '...')) + print(("lines saved:", nrows, "...")) # Read chunks of ASCII file for chunk in reader: - if names == 'data': + if names == "data": dset = save_as_mat(dset, nrows, chunk.values) else: dset = save_as_vec(dset, nrows, chunk.values) nrows += chunk.shape[0] - print(('lines saved:', nrows, '...')) + print(("lines saved:", nrows, "...")) - print(('input <- ', infile)) - print(('output ->', outfile)) - print(('variable names in HDF5:', names)) + print(("input <- ", infile)) + print(("output ->", outfile)) + print(("variable names in HDF5:", names)) if njobs == 1: - print('Running sequential code...') + print("Running sequential code...") [main(f) for f in files] else: - print(('Running parallel code (%d jobs)...' % njobs)) + print(("Running parallel code (%d jobs)..." % njobs)) from joblib import Parallel, delayed + Parallel(n_jobs=njobs, verbose=5)(delayed(main)(f) for f in files) diff --git a/captoolkit/utils.py.bak b/captoolkit/utils.py.bak deleted file mode 100755 index 9724da3..0000000 --- a/captoolkit/utils.py.bak +++ /dev/null @@ -1,289 +0,0 @@ -""" -High-level functions used across the CAP-Toolkit package. - -""" -import h5py -import pyproj -import numpy as np -import pandas as pd -import xarray as xr -from scipy import signal - - -# --- Utilitiy functions --- # - - -def print_args(args): - """Print arguments passed to argparse.""" - print("Input arguments:") - for arg in vars(args).items(): - print(arg) - - -def read_h5(fname, vnames): - """Generic HDF5 reader. - - vnames : ['var1', 'var2', 'var3'] - """ - with h5py.File(fname, "r") as f: - variables = [f[v][()] for v in vnames] - - return variables if len(vnames) > 1 else variables[0] - - -def save_h5(fname, vardict, mode="a"): - """Generic HDF5 writer. - - vardict : {'name1': var1, 'name2': va2, 'name3': var3} - """ - with h5py.File(fname, mode) as f: - for k, v in list(vardict.items()): - if k in f: - f[k][:] = np.squeeze(v) - else: - f[k] = np.squeeze(v) - - -def is_empty(ifile): - """Test if file is corruted or empty""" - try: - with h5py.File(ifile, "r") as f: - if bool(list(f.keys())): - return False - else: - return True - except IOError: - return True - - -def find_nearest(arr, val): - """Find index of 'nearest' value(s). - - Args: - arr (nd array) : The array to search in (nd). No need to be sorted. - val (scalar or array) : Value(s) to find. - - Returns: - out (tuple or scalar) : The index (or tuple if nd array) of nearest - entry found. If `val` is a list of values then a tuple of ndarray - with the indices of each value is return. - - See also: - find_nearest2 - - """ - idx = [] - - if np.ndim(val) == 0: - val = np.array([val]) - - for v in val: - idx.append((np.abs(arr - v)).argmin()) - idx = np.unravel_index(idx, arr.shape) - - return idx if val.ndim > 1 else idx[0] - - -def mad_std(x, axis=None): - """Robust standard deviation (using MAD).""" - - return 1.4826 * np.nanmedian(np.abs(x - np.nanmedian(x, axis)), axis) - - -def transform_coord(proj1, proj2, x, y): - """Transform coordinates from proj1 to proj2 (EPSG num). - - Examples EPSG proj: - Geodetic (lon/lat): 4326 - Stereo AnIS (x/y): 3031 - Stereo GrIS (x/y): 3413 - """ - # Set full EPSG projection strings - proj1 = pyproj.Proj("+init=EPSG:" + str(proj1)) - proj2 = pyproj.Proj("+init=EPSG:" + str(proj2)) - # Convert coordinates - - return pyproj.transform(proj1, proj2, x, y) - - -# --- Processing functions --- # - - -def sgolay1d(h, window=3, order=1, deriv=0, dt=1.0, mode="nearest", time=None): - """Savitztky-Golay filter with support for NaNs. - - If time is given, interpolate NaNs otherwise pad w/zeros. - If time is given, calculate dt as t[1]-t[0]. - - Args: - dt (int): spacing between samples (for correct units). - - Notes: - Works with numpy, pandas and xarray objects. - - """ - if isinstance(h, (pd.Series, xr.DataArray)): - h = h.values - if isinstance(time, (pd.Series, xr.DataArray)): - time = time.values - - _h = h.copy() - (i_nan,) = np.where(np.isnan(_h)) - (i_valid,) = np.where(np.isfinite(_h)) - - if i_valid.size < 5: - return _h - elif time is not None: - _h[i_nan] = np.interp(time[i_nan], time[i_valid], _h[i_valid]) - dt = np.abs(time[1] - time[0]) - else: - _h[i_nan] = 0 - - return signal.savgol_filter(_h, window, order, deriv, delta=dt, mode=mode) - - -# TODO: Think if dx, dy should be applied here !!! -def sgolay2d(z, window_size, order, derivative=None): - """Two dimensional data smoothing and least-square gradient estimate. - - Code from: - http://scipy-cookbook.readthedocs.io/items/SavitzkyGolay.html - - Reference: - A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of - Data by Simplified Least Squares Procedures. Analytical - Chemistry, 1964, 36 (8), pp 1627-1639. - - """ - # number of terms in the polynomial expression - # TODO: Double check this (changed for Py3) - n_terms = (order + 1) * (order + 2) // 2 - - if window_size % 2 == 0: - raise ValueError("window_size must be odd") - - if window_size ** 2 < n_terms: - raise ValueError("order is too high for the window size") - - half_size = window_size // 2 - - # exponents of the polynomial. - # p(x,y) = a0 + a1*x + a2*y + a3*x^2 + a4*y^2 + a5*x*y + ... - # this line gives a list of two item tuple. Each tuple contains - # the exponents of the k-th term. First element of tuple is for x - # second element for y. - # Ex. exps = [(0,0), (1,0), (0,1), (2,0), (1,1), (0,2), ...] - exps = [(k - n, n) for k in range(order + 1) for n in range(k + 1)] - - # coordinates of points - ind = np.arange(-half_size, half_size + 1, dtype=np.float64) - dx = np.repeat(ind, window_size) - dy = np.tile(ind, [window_size, 1]).reshape(window_size ** 2,) - - # build matrix of system of equation - A = np.empty((window_size ** 2, len(exps))) - - for i, exp in enumerate(exps): - A[:, i] = (dx ** exp[0]) * (dy ** exp[1]) - - # pad input array with appropriate values at the four borders - new_shape = z.shape[0] + 2 * half_size, z.shape[1] + 2 * half_size - Z = np.zeros((new_shape)) - # top band - band = z[0, :] - Z[:half_size, half_size:-half_size] = band - np.abs( - np.flipud(z[1 : half_size + 1, :]) - band - ) - # bottom band - band = z[-1, :] - Z[-half_size:, half_size:-half_size] = band + np.abs( - np.flipud(z[-half_size - 1 : -1, :]) - band - ) - # left band - band = np.tile(z[:, 0].reshape(-1, 1), [1, half_size]) - Z[half_size:-half_size, :half_size] = band - np.abs( - np.fliplr(z[:, 1 : half_size + 1]) - band - ) - # right band - band = np.tile(z[:, -1].reshape(-1, 1), [1, half_size]) - Z[half_size:-half_size, -half_size:] = band + np.abs( - np.fliplr(z[:, -half_size - 1 : -1]) - band - ) - # central band - Z[half_size:-half_size, half_size:-half_size] = z - - # top left corner - band = z[0, 0] - Z[:half_size, :half_size] = band - np.abs( - np.flipud(np.fliplr(z[1 : half_size + 1, 1 : half_size + 1])) - band - ) - # bottom right corner - band = z[-1, -1] - Z[-half_size:, -half_size:] = band + np.abs( - np.flipud(np.fliplr(z[-half_size - 1 : -1, -half_size - 1 : -1])) - - band - ) - - # top right corner - band = Z[half_size, -half_size:] - Z[:half_size, -half_size:] = band - np.abs( - np.flipud(Z[half_size + 1 : 2 * half_size + 1, -half_size:]) - band - ) - # bottom left corner - band = Z[-half_size:, half_size].reshape(-1, 1) - Z[-half_size:, :half_size] = band - np.abs( - np.fliplr(Z[-half_size:, half_size + 1 : 2 * half_size + 1]) - band - ) - - # solve system and convolve - - if derivative is None: - m = np.linalg.pinv(A)[0].reshape((window_size, -1)) - - return signal.fftconvolve(Z, m, mode="valid") - elif derivative == "col": - c = np.linalg.pinv(A)[1].reshape((window_size, -1)) - - return signal.fftconvolve(Z, -c, mode="valid") - elif derivative == "row": - r = np.linalg.pinv(A)[2].reshape((window_size, -1)) - - return signal.fftconvolve(Z, -r, mode="valid") - elif derivative == "both": - c = np.linalg.pinv(A)[1].reshape((window_size, -1)) - r = np.linalg.pinv(A)[2].reshape((window_size, -1)) - - return ( - signal.fftconvolve(Z, -r, mode="valid"), - signal.fftconvolve(Z, -c, mode="valid"), - ) - - -def make_grid(xmin, xmax, ymin, ymax, dx, dy, return_2d=False): - """Construct output grid-coordinates.""" - Nn = int((np.abs(ymax - ymin)) / dy) + 1 - Ne = int((np.abs(xmax - xmin)) / dx) + 1 - xi = np.linspace(xmin, xmax, num=Ne) - yi = np.linspace(ymin, ymax, num=Nn) - - if return_2d: - return np.meshgrid(xi, yi) - else: - return xi, yi - -# --- Test functions --- # - - -# Some edge test cases (for the 3-km grid) -test_ij_3km = [ - (845, 365), # 0 PIG Floating 1 - (831, 364), # 1 PIG Floating 2 - (1022, 840), # 2 CS-2 only 1 - (970, 880), # 3 CS-2 only 2 - (100, 1170), # 4 fig1 large peaks at mission overlaps - (100, 766), # 5 fig2 peak at mission overlap - (7, 893), # 6 step change at beguining - (8, 892), # 7 with hole - (9, 889), # 8 with large hole - (11, 893), # 9 step in divergence -] diff --git a/captoolkit/xing.py.bak b/captoolkit/xing.py.bak deleted file mode 100755 index 8f04e34..0000000 --- a/captoolkit/xing.py.bak +++ /dev/null @@ -1,445 +0,0 @@ -#!/usr/bin/env python2 -import warnings -warnings.filterwarnings("ignore") -import sys -import glob -import pyproj -import pandas as pd -import numpy as np -import h5py -import argparse -import statsmodels.api as sm -import matplotlib.pyplot as plt -from scipy.spatial import cKDTree -from gdalconst import * -from osgeo import gdal, osr -from scipy.ndimage import map_coordinates -from scipy.stats import binned_statistic_2d -from scipy.spatial import cKDTree - -""" - - Program for computing statistics between two altimetry data sets - -""" - -def interp2d(xd, yd, data, xq, yq, **kwargs): - """ Interpolator from raster to point """ - - xd = np.flipud(xd) - yd = np.flipud(yd) - data = np.flipud(data) - - xd = xd[0, :] - yd = yd[:, 0] - - nx, ny = xd.size, yd.size - (x_step, y_step) = (xd[1] - xd[0]), (yd[1] - yd[0]) - - assert (ny, nx) == data.shape - assert (xd[-1] > xd[0]) and (yd[-1] > yd[0]) - - if np.size(xq) == 1 and np.size(yq) > 1: - xq = xq * ones(yq.size) - elif np.size(yq) == 1 and np.size(xq) > 1: - yq = yq * ones(xq.size) - - xp = (xq - xd[0]) * (nx - 1) / (xd[-1] - xd[0]) - yp = (yq - yd[0]) * (ny - 1) / (yd[-1] - yd[0]) - - coord = np.vstack([yp, xp]) - - zq = map_coordinates(data, coord, **kwargs) - - return zq - - -def mad_std(x, axis=None): - """ Robust standard deviation (using MAD). """ - return 1.4826 * np.nanmedian(np.abs(x - np.nanmedian(x, axis)), axis) - - -def sigma_filter(x, xmin=-9999, xmax=9999, tol=5, alpha=5): - """ Iterative outlier filter """ - - # Set default value - tau = 100.0 - - # Remove data outside selected range - x[x < xmin] = np.nan - x[x > xmax] = np.nan - - # Initiate counter - k = 0 - - # Outlier rejection loop - while tau > tol: - - # Compute initial rms - rmse_b = mad_std(x) - - # Compute residuals - dh_abs = np.abs(x - np.nanmedian(x)) - - # Index of outliers - io = dh_abs > alpha * rmse_b - - # Compute edited rms - rmse_a = mad_std(x[~io]) - - # Determine rms reduction - tau = 100.0 * (rmse_b - rmse_a) / rmse_a - - # Remove data if true - if tau > tol or k == 0: - # Set outliers to NaN - x[io] = np.nan - - # Update counter - k += 1 - - return x - - -def geotiffread(ifile): - """ Read Geotiff file """ - - file = gdal.Open(ifile, GA_ReadOnly) - - metaData = file.GetMetadata() - projection = file.GetProjection() - src = osr.SpatialReference() - src.ImportFromWkt(projection) - proj = src.ExportToWkt() - - Nx = file.RasterXSize - Ny = file.RasterYSize - - trans = file.GetGeoTransform() - - dx = trans[1] - dy = trans[5] - - Xp = np.arange(Nx) - Yp = np.arange(Ny) - - (Xp, Yp) = np.meshgrid(Xp, Yp) - - X = trans[0] + (Xp + 0.5) * trans[1] + (Yp + 0.5) * trans[2] - Y = trans[3] + (Xp + 0.5) * trans[4] + (Yp + 0.5) * trans[5] - - band = file.GetRasterBand(1) - - Z = band.ReadAsArray() - - dx = np.abs(dx) - dy = np.abs(dy) - - return X, Y, Z, dx, dy, proj - - -def transform_coord(proj1, proj2, x, y): - """Transform coordinates from proj1 to proj2 (EPSG num).""" - - # Set full EPSG projection strings - proj1 = pyproj.Proj("+init=EPSG:"+proj1) - proj2 = pyproj.Proj("+init=EPSG:"+proj2) - - # Convert coordinates - return pyproj.transform(proj1, proj2, x, y) - - -def wrapTo360(lon): - """ Wrap longitude to 360 deg """ - positiveInput = (lon > 0.0) - lon = np.mod(lon, 360.0) - lon[(lon == 0) & positiveInput] = 360.0 - return lon - - -# Wrap longitude to 180 deg -def wrapTo180(lon): - """Wrap longitude to 180 deg """ - q = (lon < -180.0) | (180.0 < lon) - lon[q] = wrapTo360(lon[q] + 180.0) - 180.0 - return lon - - -# Output description of solution -description = ('Program for computing statistics between two altimetry datasets.') - -# Define command-line arguments -parser = argparse.ArgumentParser(description=description) - -parser.add_argument( - '-r', metavar='fref', dest='fref', type=str, nargs='+', - help='reference file(s)', - required=True) - -parser.add_argument( - '-f', metavar='fcomp', dest='fcomp', type=str, nargs='+', - help='comparison files(s)', - required=True) - -parser.add_argument( - '-o', metavar='ofile', dest='ofile', type=str, nargs=1, - help='name of output statistics file',) - -parser.add_argument( - '-d', metavar='dxy', dest='dxy', type=float, nargs=1, - help=('spatial resolution of comparison grid (m)'), - default=[50],) - -parser.add_argument( - '-p', metavar=('epsg_num'), dest='proj', type=str, nargs=1, - help=('EPSG proj number (AnIS=3031, GrIS=3413)'), - default=['3031'],) - -parser.add_argument( - '-v', metavar=('x','y','t','h'), dest='vnames_ref', type=str, nargs=4, - help=('name of varibales in reference file'), - default=['lon','lat','t_year','h_cor'],) - -parser.add_argument( - '-u', metavar=('x','y','t','h'), dest='vnames_com', type=str, nargs=4, - help=('name of varibales in comparison file'), - default=['lon','lat','t_year','h_cor'],) - -parser.add_argument( - '-s', metavar=('s_min','s_max'), dest='slope', type=float, nargs=2, - help=('min and max slope interval (deg)'), - default=[0.0,1.0],) - -parser.add_argument( - '-t', metavar='dt', dest='tspan', type=float, nargs=1, - help=('only compare data with time-span < dt'), - default=[3./12],) - -parser.add_argument( - '-n', metavar=('slope_file'), dest='fslope', type=str, nargs=1, - help='name of slope raster file', - default=[None],) - -parser.add_argument( - '-i', metavar=('n_ref','n_com'), dest='ncomp', type=int, nargs=2, - help=('sub-sample data using every n:th point'), - default=[1,1],) - -# Create parser argument container -args = parser.parse_args() - -# Pass arguments -fref = args.fref -fcom = args.fcomp -ofile = args.ofile[0] -dxy = args.dxy[0] -proj = args.proj[0] -vref = args.vnames_ref[:] -cref = args.vnames_com[:] -s_min = args.slope[0] -s_max = args.slope[1] -tspan = args.tspan[0] -fslp = args.fslope[0] -nref = args.ncomp[0] -ncom = args.ncomp[1] - -# Initiate statistics -Fref = [] -Fcom = [] -mean = [] -stdv = [] -rmse = [] -vmin = [] -vmax = [] -nobs = [] - -# Check for slope file -if fslp is not None: - - # Read slope file - (X, Y, Z) = geotiffread(fslp)[0:3] - -# Loop trough reference list -for f_ref in fref: - - # Load file - with h5py.File(f_ref, 'r') as fr: - - # Load ref. variables - xr = fr[vref[0]][::nref] - yr = fr[vref[1]][::nref] - tr = fr[vref[2]][::nref] - zr = fr[vref[3]][::nref] - - # Copy locations - lon_r, lat_r = xr[:], yr[:] - - # Transform to wanted coordinate system - (xr, yr) = transform_coord('4326', proj, xr, yr) - - # Compute bounding box - xmin, xmax, ymin, ymax = np.min(xr), np.max(xr), np.min(yr), np.max(yr) - - # Loop through comparison list - for f_com in fcom: - - # Load file - with h5py.File(f_com, 'r') as fr: - - # Load com. variables - xc = fr[cref[0]][::ncom] - yc = fr[cref[1]][::ncom] - tc = fr[cref[2]][::ncom] - zc = fr[cref[3]][::ncom] - - # Check mean time difference - # if np.abs(tr.mean() - tc.mean()) > 3 * tspan: continue - - # Transform to wanted coordinate system - (xc, yc) = transform_coord('4326', proj, xc, yc) - - # Index of data - idx = (xc > xmin) & (xc < xmax) & (yc > ymin) & (yc < ymax) - - # Cut to same area as reference - xc, yc, zc, tc = xc[idx], yc[idx], zc[idx], tc[idx] - - # Construct KD-Tree - tree = cKDTree(list(zip(xc, yc))) - - # Output vector - dz = np.ones(len(zr)) * np.nan - xo = np.ones(len(zr)) * np.nan - yo = np.ones(len(zr)) * np.nan - z1 = np.ones(len(zr)) * np.nan - z2 = np.ones(len(zr)) * np.nan - t1 = np.ones(len(zr)) * np.nan - t2 = np.ones(len(zr)) * np.nan - - # Loop trough reference points - for kx in range(len(xr)): - - # Query KD-Tree - dr, ky = tree.query((xr[kx], yr[kx]), k=1) - - # Check if we should compute - if dr > dxy: continue - - if np.abs(tr[kx]-tc[ky]) > tspan: continue - - # Compute difference - dz[kx] = zr[kx] - zc[ky] - - # Save location where we have difference - z1[kx] = zr[kx] - z2[kx] = zc[ky] - xo[kx] = lon_r[kx] - yo[kx] = lat_r[kx] - t1[kx] = tr[kx] - t2[kx] = tc[ky] - - # If no data skip - if np.all(np.isnan(dz)): - continue - - # Light filtering of outliers - dz = sigma_filter(dz) - - # Check if we are binning by slope - if fslp: - - # Interpolate slope to data - slp = interp2d(X, Y, Z, xr, yr, order=1) - - # Cull using surface slope - dz[(slp < s_min) & (slp > s_max)] = np.nan - - else: - - # No slope provided - slp = np.ones(len(zr)) * 9999 - - # Find NaN-values - inan = ~np.isnan(dz) - - # Save to csv file - data = {'lat' : np.around(yo[inan],4), - 'lon' : np.around(xo[inan],4), - 't_ref' : np.around(t1[inan],3), - 't_com' : np.around(t2[inan],3), - 'v_ref' : np.around(z1[inan],3), - 'v_com' : np.around(z2[inan],3), - 'v_diff': np.around(dz[inan],3), - 'dt' : np.around(t1[inan]-t2[inan],3), - 'slope' : np.around(slp[inan],3),} - - # Get name only and not path to files - f_ref_i = f_ref[f_ref.rfind('/') + 1:] - f_com_i = f_com[f_com.rfind('/') + 1:] - - # Create data frame - df = pd.DataFrame(data, columns=['lat', 'lon', 't_ref', 't_com', 'v_ref', 'v_com', 'v_diff', 'dt', 'slope']) - - # Save to csv - df.to_csv(f_ref_i+'_'+f_com_i+'.csv', sep=',', index=False) - - # Compute statistics - avg = np.around(np.nanmean(dz),3) - std = np.around(np.nanstd(dz),3) - rms = np.around(np.sqrt(avg**2 + std**2),3) - min = np.around(np.nanmin(dz),3) - max = np.around(np.nanmax(dz),3) - nob = len(dz[~np.isnan(dz)]) - - # Save all the stats - Fref.append(f_ref_i) - Fcom.append(f_com_i) - mean.append(avg) - stdv.append(std) - rmse.append(rms) - vmin.append(min) - vmax.append(max) - nobs.append(nob) - - # Print statistics to screen - print('Ref:' ,f_ref_i, 'Comp:', f_com_i, 'Mean:', avg, 'Std:', std, 'RMS:', rms, 'Nobs:', nob) - - # Plot data if wanted - if 0: - plt.figure() - plt.hist(dz[~np.isnan(dz)], 50) - plt.show() - -# Compute weighted averages -m = np.asarray(mean) -n = np.asarray(nobs) -s = np.asarray(stdv) - -# Compute weights -w = n / (s * s * np.sum(n)) - -# Weighted average and standard deviation -aw = np.sum(w * m)/np.sum(w) -sw = np.sqrt(1 / np.sum(w)) - -# Print weighted statistics -print('#############################################################') -print('| Weighted Statistics |', 'Wmean:', np.around(aw, 2), 'Wstd:', np.around(sw, 2), \ - 'WRMSE:', np.around(np.sqrt(aw**2 + sw**2), 2), '|') -print('#############################################################') - -# Create data container -raw_data = {'Reference' : Fref, - 'Comparison' : Fcom, - 'Mean' : mean, - 'Std.dev' : stdv, - 'RMSE' : rmse, - 'Min' : vmin, - 'Max' : vmax, - 'Nobs' : nobs,} - -# Create data frame -df = pd.DataFrame(raw_data, columns = ['Reference','Comparison','Mean','Std.dev','RMSE','Min','Max','Nobs']) - -# Save to csv -df.to_csv(ofile, sep=',', index=False) diff --git a/captoolkit/xover.py.bak b/captoolkit/xover.py.bak deleted file mode 100755 index 1507295..0000000 --- a/captoolkit/xover.py.bak +++ /dev/null @@ -1,914 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import glob -import numpy as np -import pyproj -import h5py -import argparse -import warnings -import matplotlib.pyplot as plt -from scipy.interpolate import InterpolatedUnivariateSpline -from datetime import datetime -from scipy import stats -from mpl_toolkits.axes_grid1.inset_locator import inset_axes - -""" - Program for computing satellite crossover differences from ascending and descending orbit paths using linear or - cubic interpolation (linear is preferred) between the closest points around the crossover location. The input - needs to be separated into asc. and des. files and contain (minimum) orbit number, lon, lat, time, height. The user - can further provide three extra variables to be crossed (original set up for radar altimetry waveform corrections), - allowing the user to cross in the same run a maximum of four variables. The software uses internal tiling to speed - up computation of the crossovers, specified by the user, it further can process external tiles using parallel - processing. For external processing please use "tile.py" with a provided extent, as the program uses the tile - numbering to determine which tiles should be crossed together. To further speed up processing the user can - down-sample the tracks, allowing for a faster computation of the crossing point (the difference is computed using - the full track sampling). - - Please type "xover.py -h" for help with input! - - Example: - - python ./xover.py a.h5 d.h5 -o xover.h5 -r 350 -p 3031 -d 20 -k 3 -m linear -v orb lon lat time height na na na -q - - Written by Johan Nilsson, with large contributions by Fernando Paolo. - NASA Jet Propulsion Laboratory 2018 - -""" - -# Ignore all warnings -warnings.filterwarnings("ignore") - -def get_args(): - """ Get command-line arguments. """ - parser = argparse.ArgumentParser( - description='Program for computing satellite/airborne crossovers.') - parser.add_argument( - 'input', metavar='ifile', type=str, nargs=2, - help='name of two input files to cross (HDF5)') - parser.add_argument( - '-o', metavar='ofile', dest='output', type=str, nargs=1, - help='name of output file (HDF5)', - default=[None]) - parser.add_argument( - '-r', metavar=('radius'), dest='radius', type=float, nargs=1, - help='maximum interpolation distance from crossing location (m)', - default=[350],) - parser.add_argument( - '-p', metavar=('epsg_num'), dest='proj', type=str, nargs=1, - help=('projection: EPSG number (AnIS=3031, GrIS=3413)'), - default=['4326'],) - parser.add_argument( - '-d', metavar=('tile_size'), dest='dxy', type=int, nargs=1, - help='tile size (km)', - default=[None],) - parser.add_argument( - '-k', metavar=('na','nd'), dest='nres', type=int, nargs=2, - help='along-track subsampling every k:th pnt for each file', - default=[1],) - parser.add_argument( - '-b', metavar=('buffer'), dest='buff', type=int, nargs=1, - help=('tile buffer (km)'), - default=[0],) - parser.add_argument( - '-m', metavar=None, dest='mode', type=str, nargs=1, - help='interpolation method, "linear" or "cubic"', - choices=('linear', 'cubic'), default=['linear'],) - parser.add_argument( - '-v', metavar=('o','x','y','t','h','b','l','t'), dest='vnames', type=str, nargs=8, - help=('main vars: names if HDF5, orbit/lon/lat/time/height/bs/lew/tes'), - default=[None],) - parser.add_argument( - '-t', metavar=('t1','t2'), dest='tspan', type=float, nargs=2, - help='only compute crossovers for given time span', - default=[None,None],) - parser.add_argument( - '-f', dest='tile', action='store_true', - help=('run in tile mode'), - default=False) - parser.add_argument( - '-q', dest='plot', action='store_true', - help=('plot for inspection'), - default=False) - parser.add_argument( - '-n', metavar=('njobs'), dest='njobs', type=int, nargs=1, - help="for parallel processing of multiple files, optional", - default=[1],) - parser.add_argument( - '-i', dest='diff', action='store_true', - help=('do not interpolate vars just take diff'), - default=False) - - return parser.parse_args() - - -def intersect(x_down, y_down, x_up, y_up): - """ Find orbit crossover locations. """ - - p = np.column_stack((x_down, y_down)) - q = np.column_stack((x_up, y_up)) - - (p0, p1, q0, q1) = p[:-1], p[1:], q[:-1], q[1:] - rhs = q0 - p0[:, np.newaxis, :] - - mat = np.empty((len(p0), len(q0), 2, 2)) - mat[..., 0] = (p1 - p0)[:, np.newaxis] - mat[..., 1] = q0 - q1 - mat_inv = -mat.copy() - mat_inv[..., 0, 0] = mat[..., 1, 1] - mat_inv[..., 1, 1] = mat[..., 0, 0] - - det = mat[..., 0, 0] * mat[..., 1, 1] - mat[..., 0, 1] * mat[..., 1, 0] - mat_inv /= det[..., np.newaxis, np.newaxis] - - import numpy.core.umath_tests as ut - - params = ut.matrix_multiply(mat_inv, rhs[..., np.newaxis]) - intersection = np.all((params >= 0) & (params <= 1), axis=(-1, -2)) - p0_s = params[intersection, 0, :] * mat[intersection, :, 0] - - return p0_s + p0[np.where(intersection)[0]] - - -def transform_coord(proj1, proj2, x, y): - """ Transform coordinates from proj1 to proj2 (EPSG num). """ - - # Set full EPSG projection strings - proj1 = pyproj.Proj("+init=EPSG:"+str(proj1)) - proj2 = pyproj.Proj("+init=EPSG:"+str(proj2)) - - # Convert coordinates - return pyproj.transform(proj1, proj2, x, y) - - -def get_bboxs_old(xmin, xmax, ymin, ymax, dxy): - """ - Define blocks (bbox) for speeding up the processing. - - Args: - xmin/xmax/ymin/ymax: must be in grid projection: stereographic (m). - dxy: grid-cell size. - """ - # Number of tile edges on each dimension - Nns = int(np.abs(ymax - ymin) / dxy) + 1 - New = int(np.abs(xmax - xmin) / dxy) + 1 - - # Coord of tile edges for each dimension - xg = np.linspace(xmin, xmax, New) - yg = np.linspace(ymin, ymax, Nns) - - # Vector of bbox for each cell - bboxs = [(w,e,s,n) for w,e in zip(xg[:-1], xg[1:]) - for s,n in zip(yg[:-1], yg[1:])] - del xg, yg - - return bboxs - - -def get_bboxs(x, y, xmin, xmax, ymin, ymax, dxy, buff): - """ - Define blocks (bbox) for speeding up the processing. - - Args: - xmin/xmax/ymin/ymax: must be in grid projection: stereographic (m). - dxy: grid-cell size. - """ - - # Number of tile edges on each dimension - Nns = int(np.abs(ymax - ymin) / dxy) + 1 - New = int(np.abs(xmax - xmin) / dxy) + 1 - - # Coord of tile edges for each dimension - xg = np.linspace(xmin-buff, xmax+buff, New) - yg = np.linspace(ymin-buff, ymax+buff, Nns) - - # Indicies for each grid-cell - bboxs = stats.binned_statistic_2d(x, y, None,'count', bins=[xg,yg]).binnumber - - return bboxs - -def mad_std(x, axis=None): - """ Robust standard deviation (using MAD). """ - return 1.4826 * np.nanmedian(np.abs(x - np.nanmedian(x, axis)), axis) - - -def interp1D(x, y, xi, n=1): - """ 1D interpolation """ - - # Sort data - idx = np.argsort(x) - - # Sort arrays - x, y = x[idx], y[idx] - - # Create interpolator - Fi = InterpolatedUnivariateSpline(x, y, k=n) - - # Interpolated value - yi = Fi(xi) - - return yi - -def tile_num(fname): - """ Extract tile number from file name. """ - l = os.path.splitext(fname)[0].split('_') # fname -> list - i = l.index('tile') - return int(l[i+1]) - -def match_tiles(str1,str2,key): - """ Matches tile indices """ - - # Get file names - files1 = glob.glob(str1) - files2 = glob.glob(str2) - - # Create output list - f1out = [] - f2out = [] - - # Loop trough files-1 - for file1 in files1: - - # Get tile index - f1 = tile_num(file1) - - # Loop trough files-2 - for file2 in files2: - - # Get tile index - f2 = tile_num(file2) - - # Check if tiles have same index - if f1 == f2: - - # Save if true - f1out.append(file1) - f2out.append(file2) - break - - return f1out, f2out - -# Read in parameters -args = get_args() -ifiles = args.input[:] -ofile_ = args.output[0] -radius = args.radius[0] -proj = args.proj[0] -dxy = args.dxy[0] -nres_a = args.nres[0] -nres_d = args.nres[1] -buff = args.buff[0] -mode = args.mode[0] -vnames = args.vnames[:] -tspan = args.tspan[:] -njobs = args.njobs[0] -tile = args.tile -plot = args.plot -diff = args.diff - -print('parameters:') -for arg in vars(args).items(): print(arg) - -# Get variable names -ovar, xvar, yvar, tvar, zvar, bvar, lvar, svar = vnames - -# Create output names -oovar_a = ovar+'_1' -oovar_d = ovar+'_2' -oxvar_x = xvar -oyvar_x = yvar -otvar_a = tvar+'_1' -otvar_d = tvar+'_2' -otvar_x = 'd'+tvar -ozvar_a = zvar+'_1' -ozvar_d = zvar+'_2' -ozvar_x = 'd'+zvar - -obvar_a = bvar+'_1' -obvar_d = bvar+'_2' -obvar_x = 'd'+bvar -olvar_a = lvar+'_1' -olvar_d = lvar+'_2' -olvar_x = 'd'+lvar -osvar_a = svar+'_1' -osvar_d = svar+'_2' -osvar_x = 'd'+svar - -# Test names -if ozvar_x == osvar_x or ozvar_x == obvar_x or ozvar_x == olvar_x: - print("****************************************************************") - print("Extra parameters can't have the same name as the main parameter!") - print("****************************************************************") - sys.exit() - -# Test for stereographic -if proj != "4326" and dxy is not None: - - # Convert to meters - dxy *= 1e3 - -def main(ifile1, ifile2): - """ Find and compute crossover values. """ - - # Start time of program - startTime = datetime.now() - - print('crossing files:', ifile1, ifile2, '...') - - # Load all 1d variables needed - with h5py.File(ifile1, 'r') as f1, \ - h5py.File(ifile2, 'r') as f2: - - # File 1 - orbit1 = f1[ovar][:] - lon1 = f1[xvar][:] - lat1 = f1[yvar][:] - time1 = f1[tvar][:] - height1 = f1[zvar][:] - bs1 = f1[bvar][:] if bvar in f1 else np.zeros(lon1.shape) - lew1 = f1[lvar][:] if lvar in f1 else np.zeros(lon1.shape) - tes1 = f1[svar][:] if svar in f1 else np.zeros(lon1.shape) - - # File 2 - orbit2 = f2[ovar][:] - lon2 = f2[xvar][:] - lat2 = f2[yvar][:] - time2 = f2[tvar][:] - height2 = f2[zvar][:] - bs2 = f2[bvar][:] if bvar in f2 else np.zeros(lon2.shape) - lew2 = f2[lvar][:] if lvar in f2 else np.zeros(lon2.shape) - tes2 = f2[svar][:] if svar in f2 else np.zeros(lon2.shape) - - # File 1 - orbit1 = orbit1[~np.isnan(height1)] - lon1 = lon1[~np.isnan(height1)] - lat1 = lat1[~np.isnan(height1)] - time1 = time1[~np.isnan(height1)] - bs1 = bs1[~np.isnan(height1)] - lew1 = lew1[~np.isnan(height1)] - tes1 = tes1[~np.isnan(height1)] - height1 = height1[~np.isnan(height1)] - - # File 2 - orbit2 = orbit2[~np.isnan(height2)] - lon2 = lon2[~np.isnan(height2)] - lat2 = lat2[~np.isnan(height2)] - time2 = time2[~np.isnan(height2)] - bs2 = bs2[~np.isnan(height2)] - lew2 = lew2[~np.isnan(height2)] - tes2 = tes2[~np.isnan(height2)] - height2 = height2[~np.isnan(height2)] - - # Add scattering correction for radar if available - try: - # Read in correction - h_c1 = f1['h_bs'][:] - h_c2 = f2['h_bs'][:] - - # Set NaN's to zero - h_c1[np.isnan(h_c1)] = 0 - h_c2[np.isnan(h_c2)] = 0 - - # Subtract correction for scattering - height1 -= h_c1 - height2 -= h_c2 - print('Scattering correction added!') - - except: - pass - - # Set flags for extra parameters - if np.all(bs1 == 0): - flag_bs = False - else: - flag_bs = True - if np.all(lew1 == 0): - flag_le = False - else: - flag_le = True - if np.all(tes1 == 0): - flag_ts = False - else: - flag_ts = True - - # If time span given, filter out invalid data - if tspan[0] != None: - - t1, t2 = tspan - - idx, = np.where((time1 >= t1) & (time1 <= t2)) - orbit1 = orbit1[idx] - lon1 = lon1[idx] - lat1 = lat1[idx] - time1 = time1[idx] - height1 = height1[idx] - bs1 = bs1[idx] - lew1 = lew1[idx] - tes1 = tes1[idx] - - idx, = np.where((time2 >= t1) & (time2 <= t2)) - orbit2 = orbit2[idx] - lon2 = lon2[idx] - lat2 = lat2[idx] - time2 = time2[idx] - height2 = height2[idx] - bs2 = bs2[idx] - lew2 = lew2[idx] - tes2 = tes2[idx] - - if len(time1) < 3 or len(time2) < 3: - print('there are no points within time-span specified!') - sys.exit() - - # Transform to wanted coordinate system - (xp1, yp1) = transform_coord(4326, proj, lon1, lat1) - (xp2, yp2) = transform_coord(4326, proj, lon2, lat2) - - # Time limits: the largest time span (yr) - tmin = min(np.nanmin(time1), np.nanmin(time2)) - tmax = max(np.nanmax(time1), np.nanmax(time2)) - - # Boundary limits: the smallest spatial domain (m) - xmin = max(np.nanmin(xp1), np.nanmin(xp2)) - xmax = min(np.nanmax(xp1), np.nanmax(xp2)) - ymin = max(np.nanmin(yp1), np.nanmin(yp2)) - ymax = min(np.nanmax(yp1), np.nanmax(yp2)) - - # Interpolation type and number of needed points - if mode == "linear": - - # Linear interpolation - nobs = 2 - order = 1 - - else: - - # Cubic interpolation - nobs = 6 - order = 3 - - # Tiling option - "on" or "off" - if dxy: - - print('tileing asc/des data!') - - # Get bounding box - bboxs1 = get_bboxs(xp1, yp1, xmin, xmax, ymin, ymax, dxy, buff*1e3) - bboxs2 = get_bboxs(xp2, yp2, xmin, xmax, ymin, ymax, dxy, buff*1e3) - - # Copy box for conviniance - bboxs = bboxs1 - - else: - - # Get bounding box from full domain - bboxs = [(xmin, xmax, ymin, ymax)] - - # Start time of program - startTime = datetime.now() - - #print 'number of sub-tiles:', len(np.unique(bboxs1)) - #print 'number of sub-tiles:', len(np.unique(bboxs1)) - - # Initiate output container (much larger than it needs to be) TAKES SOME TIME TO CREATE! - # out = np.full((2*len(orbit1)+2*len(orbit2), 12), np.nan) - out = [] - - # Initiate xover counter - i_xover = 0 - - # Counter - ki = 0 - - # Unique boxes - ibox = np.unique(bboxs) - - # Loop through each sub-tile - for k in ibox: - - # Get the tile indices - idx1, = np.where(bboxs1 == k) - idx2, = np.where(bboxs2 == k) - - # Extract tile data from each set - orbits1 = orbit1[idx1] - lons1 = lon1[idx1] - lats1 = lat1[idx1] - x1 = xp1[idx1] - y1 = yp1[idx1] - h1 = height1[idx1] - t1 = time1[idx1] - b1 = bs1[idx1] - l1 = lew1[idx1] - s1 = tes1[idx1] - - orbits2 = orbit2[idx2] - lons2 = lon2[idx2] - lats2 = lat2[idx2] - x2 = xp2[idx2] - y2 = yp2[idx2] - h2 = height2[idx2] - t2 = time2[idx2] - b2 = bs2[idx2] - l2 = lew2[idx2] - s2 = tes2[idx2] - - # Get unique orbits - orb_ids1 = np.unique(orbits1) - orb_ids2 = np.unique(orbits2) - - # Test if tile has no crossovers - if len(orbits1) == 0 or len(orbits2) == 0: - - # Go to next track - continue - - # Loop through orbits from file #1 - for orb_id1 in orb_ids1: - - # Index for single ascending orbit - i_trk1 = orbits1 == orb_id1 - - # Extract points from single orbit (a track) - xa = x1[i_trk1] - ya = y1[i_trk1] - ta = t1[i_trk1] - ha = h1[i_trk1] - ba = b1[i_trk1] - la = l1[i_trk1] - sa = s1[i_trk1] - - # Loop through tracks from file #2 - for orb_id2 in orb_ids2: - - # Index for single descending orbit - i_trk2 = orbits2 == orb_id2 - - # Extract single orbit - xb = x2[i_trk2] - yb = y2[i_trk2] - tb = t2[i_trk2] - hb = h2[i_trk2] - bb = b2[i_trk2] - lb = l2[i_trk2] - sb = s2[i_trk2] - - # Test length of vector - if len(xa) < 3 or len(xb) < 3: continue - - # Compute exact crossing - full set of observations, or every n:th point - cxy_main = intersect(xa[::nres_a], ya[::nres_a], xb[::nres_d], yb[::nres_d]) - - # Test again for crossing - if len(cxy_main) == 0: continue - - """ - SUPPORT SHOULD BE ADDED FOR MULTIPLE CROSSOVERS FOR SAME TRACK! - - """ - - # Extract crossing coordinates - xi = cxy_main[0][0] - yi = cxy_main[0][1] - - # Get start coordinates of orbits - xa0 = xa[0] - ya0 = ya[0] - xb0 = xb[0] - yb0 = yb[0] - - # Compute distance from crossing node to each arc - da = (xa - xi) * (xa - xi) + (ya - yi) * (ya - yi) - db = (xb - xi) * (xb - xi) + (yb - yi) * (yb - yi) - - # Sort according to distance - Ida = np.argsort(da) - Idb = np.argsort(db) - - # Sort arrays - A - xa = xa[Ida] - ya = ya[Ida] - ta = ta[Ida] - ha = ha[Ida] - da = da[Ida] - ba = ba[Ida] - la = la[Ida] - sa = sa[Ida] - - # Sort arrays - B - xb = xb[Idb] - yb = yb[Idb] - tb = tb[Idb] - hb = hb[Idb] - db = db[Idb] - bb = bb[Idb] - lb = lb[Idb] - sb = sb[Idb] - - # Get distance of four closest observations - dab = np.vstack((da[[0, 1]], db[[0, 1]])) - - # Test if any point is too far away - if np.any(np.sqrt(dab) > radius): - continue - # Test if enough obs. are available for interpolation - elif (len(xa) < nobs) or (len(xb) < nobs): - continue - else: - # Accepted - pass - - # Compute distance again from the furthest point - da0 = (xa - xa0) * (xa - xa0) + (ya - ya0) * (ya - ya0) - db0 = (xb - xb0) * (xb - xb0) + (yb - yb0) * (yb - yb0) - - # Compute distance again from the furthest point - dai = (xi - xa0) * (xi - xa0) + (yi - ya0) * (yi - ya0) - dbi = (xi - xb0) * (xi - xb0) + (yi - yb0) * (yi - yb0) - - # Interpolate height to crossover location - hai = interp1D(da0[0:nobs], ha[0:nobs], dai, order) - hbi = interp1D(db0[0:nobs], hb[0:nobs], dbi, order) - - # Interpolate time to crossover location - tai = interp1D(da0[0:nobs], ta[0:nobs], dai, order) - tbi = interp1D(db0[0:nobs], tb[0:nobs], dbi, order) - - # Test interpolate time values - if (tai > tmax) or (tai < tmin) or (tbi > tmax) or (tbi < tmin): - continue - - # Create output array - out_i = np.full(21, np.nan) - - # Degress of freedom - n_rms = np.sqrt(2) - - # Create RMSE of crossovers - rms_a = np.std(ha[0:nobs]) / n_rms - rms_d = np.std(hd[0:nobs]) / n_rms - - # Compute differences and save parameters - out_i[0] = xi - out_i[1] = yi - out_i[2] = hai - hbi - out_i[3] = tai - tbi - out_i[4] = tai - out_i[5] = tbi - out_i[6] = hai - out_i[7] = hbi - out_i[8] = (hai - hbi) / (tai - tbi) - out_i[18] = orb_id1 - out_i[19] = orb_id2 - out_i[20] = np.sqrt(rms_a**2 + rms_d**2) - - # Test for more parameters to difference - if flag_bs: - - if diff is True: - - # Save paramters - bai = ba[0] - bbi = bb[0] - - # Save difference - out_i[9] = bai - bbi - out_i[10] = bai - out_i[11] = bbi - - else: - - # Interpolate sigma0 to crossover location - bai = interp1D(da0[0:nobs], ba[0:nobs], order) - bbi = interp1D(db0[0:nobs], bb[0:nobs], order) - - # Save difference - out_i[9] = bai - bbi - out_i[10] = bai - out_i[11] = bbi - - if flag_le: - - if diff is True: - - # Get paramters - lai = la[0] - lbi = lb[0] - - # Save difference - out_i[9] = lai - lbi - out_i[10] = lai - out_i[11] = lbi - - else: - - # Interpolate leading edge width to crossover location - lai = interp1D(da0[0:nobs], la[0:nobs], order) - lbi = interp1D(db0[0:nobs], lb[0:nobs], order) - - # Save difference - out_i[12] = lai - lbi - out_i[13] = lai - out_i[14] = lbi - - if flag_ts: - - if diff is True: - - # Get parameters - sai = sa[0] - sbi = sb[0] - - # Save difference - out_i[15] = sai - sbi - out_i[16] = sai - out_i[17] = sbi - - else: - - # Interpolate trailing edge slope to crossover location - sai = interp1D(da0[0:nobs], sa[0:nobs], order) - sbi = interp1D(db0[0:nobs], sb[0:nobs], order) - - # Save difference - out_i[15] = sai - sbi - out_i[16] = sai - out_i[17] = sbi - - # Add to list - out.append(out_i) - - # Operating on current tile - # print 'tile:', ki, len(ibox) - - # Update counter - ki += 1 - - # Change back to numpy array - out = np.asarray(out) - - # Remove invalid rows - out = out[~np.isnan(out[:,2]),:] - - # Test if output container is empty - if len(out) == 0: - print('no crossovers found!') - return - - # Remove the two id columns if they are empty - out = out[:,:-2] if np.isnan(out[:,-1]).all() else out - - xs, ys = out[:,0].copy(), out[:,1].copy() - - # Transform coords back to lat/lon - out[:,0], out[:,1] = transform_coord(proj, '4326', out[:,0], out[:,1]) - - # Create output file name if not given - if ofile_ is None: - path, ext = os.path.splitext(ifile1) - if tile: - tilenum = str(tile_num(ifile1)) - else: - tilenum = '' - ofile = path + 'xovers_'+tilenum + ext - else: - ofile = ofile_ - - # Create h5 file - with h5py.File(ofile, 'w') as f: - - # Add standard parameters - f[oxvar_x] = out[:,0] - f[oyvar_x] = out[:,1] - f[ozvar_x] = out[:,2] - f[otvar_x] = out[:,3] - f[otvar_a] = out[:,4] - f[otvar_d] = out[:,5] - f[ozvar_a] = out[:,6] - f[ozvar_d] = out[:,7] - f[oovar_a] = out[:,18] - f[oovar_d] = out[:,19] - f['dhdt'] = out[:,8] - f['rmse'] = out[20] - - # Add extra parameters - if flag_bs: - f[obvar_x] = out[:,9] - f[obvar_a] = out[:,10] - f[obvar_d] = out[:,11] - if flag_le: - f[olvar_x] = out[:,12] - f[olvar_a] = out[:,13] - f[olvar_d] = out[:,14] - if flag_ts: - f[osvar_x] = out[:,15] - f[osvar_a] = out[:,16] - f[osvar_d] = out[:,17] - - # Stat. variables - dh = out[:,2] - dt = out[:,3] - dhdt = out[:,8] - - # Compute statistics - med0 = np.around(np.median(dh[np.abs(dt)<=1./12]),3) - std0 = np.around(mad_std(dh[np.abs(dt)<=1./12]),3) - med1 = np.around(np.median(dhdt),3) - std1 = np.around(mad_std(dhdt),3) - - # Print some statistics to screen - print('') - print('execution time: ' + str(datetime.now() - startTime)) - print('number of crossovers found:',str(len(out))) - print('statistics -> mean:',med0,'std.dev:',std0, '(m) (dt<30d)') - print('statistics -> mean:',med1,'std.dev:',std1, '(dvar/yr)') - print('ofile name ->', ofile) - - if plot: - - # Some light filtering for plotting - io = np.abs(dh) < 3.0*mad_std(dh) - - gridsize = (3, 2) - fig = plt.figure(figsize=(12, 8)) - ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2) - ax2 = plt.subplot2grid(gridsize, (2, 0)) - ax3 = plt.subplot2grid(gridsize, (2, 1)) - # Plot main difference variable - ax1.scatter(xs*1e-3,ys*1e-3,s=10,c=dh,cmap='jet',vmin=-20,vmax=20) - #plt.subplot(211) - ax2.plot(dt[io], dh[io], '.') - ax2.set_ylabel(ozvar_x) - ax2.set_xlabel(otvar_x) - #plt.subplot(212) - ax3.hist(dh[io],50) - ax3.set_ylabel('Frequency') - ax3.set_xlabel(ozvar_x) - #plt.subplots_adjust(hspace=.5) - - plt.show() - -"""" -if __name__ == '__main__': - - # Read file names - str1, str2 = ifiles - - # Check for tile mode - if tile: - - # Get matching tiles - files1, files2 = match_tiles(str1, str2, 'tile') - - # Loop trough tiles - for i in xrange(len(files1)): - - # Run main program - main(files1[i], files2[i]) - - # Run as single files - else: - - # File names - file1, file2 = str1, str2 - - # Run main program - main(file1, file2) -""" - -# Run main program -if njobs == 1 and tile is False: - - # File names - file1, file2 = ifiles - - # Run main - print('running sequential code ...') - main(file1, file2) - -else: - - print('running parallel code for tiles (%d jobs) ...' % njobs) - from joblib import Parallel, delayed - - # Get the file-names - str1, str2 = ifiles - - # Get matching tiles - files1, files2 = match_tiles(str1, str2, 'tile') - - # Run tiles in parallel - Parallel(n_jobs=njobs, verbose=5)(delayed(main)(files1[i],files2[i], n) for i in range(len(files1))) - - - - - - - - - - - -