Skip to content

Commit

Permalink
Feature #2781 Convert MET NetCDF point obs to Pandas DataFrame (#2877)
Browse files Browse the repository at this point in the history
* Per #2781, added function to convert MET NetCDF point observation data to pandas so it can be read and modified in a python embedding script. Added example python embedding script

* ignore python cache files

* fixed function call

* reduce cognitive complexity to satisfy SonarQube and add boolean return value to catch if function fails to read data

* clean up script and add comments

* replace call to object function that doesn't exist, handle exception when file passed to script cannot be read by the NetCDF library

* rename example script

* add new example script to makefiles

* fix logic to build pandas DataFrame to properly get header information from observation header IDs

* Per #2781, add unit test to demonstrate python embedding script that reads MET NetCDF point observation file and converts it to a pandas DataFrame

* Per #2781, added init function for nc_point_obs to take an input filename. Also raise TypeError exception from nc_point_obs.read_data() if input file cannot be read

* call parent class init function to properly initialize nc_point_obs
  • Loading branch information
georgemccabe authored May 8, 2024
1 parent 542b4ba commit 79ac568
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 58 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ make.log
make_install.log
.idea
cmake-build-debug

__pycache__
14 changes: 14 additions & 0 deletions internal/test_unit/xml/unit_python.xml
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,20 @@
</output>
</test>

<test name="python_plot_point_obs_met_nc_to_pandas">
<exec>&MET_BIN;/plot_point_obs</exec>
<param> \
'PYTHON_NUMPY=&MET_BASE;/python/examples/read_met_point_obs_pandas.py &OUTPUT_DIR;/pb2nc/ndas.20120409.t12z.prepbufr.tm00.nc' \
&OUTPUT_DIR;/python/ndas.20120409.t12z.prepbufr.tm00.nr_met_nc_to_pandas.ps \
-data_file &DATA_DIR_MODEL;/grib2/nam/nam_2012040900_F012.grib2 \
-dotsize 2.0 \
-v 1
</param>
<output>
<ps>&OUTPUT_DIR;/python/ndas.20120409.t12z.prepbufr.tm00.nr_met_nc_to_pandas.ps</ps>
</output>
</test>

<test name="python_ensemble_stat_FILE_LIST">
<exec>echo "&DATA_DIR_MODEL;/grib1/arw-fer-gep1/arw-fer-gep1_2012040912_F024.grib \
&DATA_DIR_MODEL;/grib1/arw-fer-gep5/arw-fer-gep5_2012040912_F024.grib \
Expand Down
3 changes: 2 additions & 1 deletion scripts/python/examples/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ pythonexamples_DATA = \
read_ascii_numpy.py \
read_ascii_point.py \
read_ascii_xarray.py \
read_met_point_obs.py
read_met_point_obs.py \
read_met_point_obs_pandas.py

EXTRA_DIST = ${pythonexamples_DATA}

Expand Down
3 changes: 2 additions & 1 deletion scripts/python/examples/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,8 @@ pythonexamples_DATA = \
read_ascii_numpy.py \
read_ascii_point.py \
read_ascii_xarray.py \
read_met_point_obs.py
read_met_point_obs.py \
read_met_point_obs_pandas.py

EXTRA_DIST = ${pythonexamples_DATA}
MAINTAINERCLEANFILES = Makefile.in
Expand Down
52 changes: 52 additions & 0 deletions scripts/python/examples/read_met_point_obs_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os
import sys

from met.point_nc import nc_point_obs

# Description: Reads a point observation NetCDF file created by MET and passes
# the data to another MET tool via Python Embedding. This script can be copied
# to perform modifications to the data before it is passed to MET.
# Example: plot_point_obs "PYTHON_NUMPY=pyembed_met_point_nc.py in.nc" out.ps
# Contact: George McCabe <[email protected]>

# Read and format the input 11-column observations:
# (1) string: Message_Type
# (2) string: Station_ID
# (3) string: Valid_Time(YYYYMMDD_HHMMSS)
# (4) numeric: Lat(Deg North)
# (5) numeric: Lon(Deg East)
# (6) numeric: Elevation(msl)
# (7) string: Var_Name(or GRIB_Code)
# (8) numeric: Level
# (9) numeric: Height(msl or agl)
# (10) string: QC_String
# (11) numeric: Observation_Value

print(f"Python Script:\t{sys.argv[0]}")

if len(sys.argv) != 2:
print("ERROR: pyembed_met_point_nc.py -> Specify only 1 input file")
sys.exit(1)

# Read the input file as the first argument
input_file = os.path.expandvars(sys.argv[1])
print("Input File:\t" + repr(input_file))

# Read MET point observation NetCDF file
try:
point_obs = nc_point_obs(input_file)
except TypeError:
print(f"ERROR: Could not read MET point data file {input_file}")
sys.exit(1)

# convert point observation data to a pandas DataFrame
df = point_obs.to_pandas()

##################################################
# perform any modifications to the data here #
##################################################

# convert pandas DataFrame to list format that is expected by MET
point_data = df.values.tolist()
print(f" point_data: Data Length:\t{len(point_data)}")
print(f" point_data: Data Type:\t{type(point_data)}")
2 changes: 1 addition & 1 deletion scripts/python/met/point.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def check_point_data(self):
# return met_point_tools.convert_to_ndarray(value_list)

def dump(self):
met_base_point.print_point_data(self.get_point_data())
met_point_tools.print_point_data(self.get_point_data())

def get_count_string(self):
return f' nobs={self.nobs} nhdr={self.nhdr} ntyp={self.nhdr_typ} nsid={self.nhdr_sid} nvld={self.nhdr_vld} nqty={self.nobs_qty} nvar={self.nobs_var}'
Expand Down
136 changes: 81 additions & 55 deletions scripts/python/met/point_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
'''

import sys
import os

import numpy as np
import netCDF4 as nc
import pandas as pd

from met.point import met_point_obs, met_point_tools

Expand Down Expand Up @@ -53,6 +55,11 @@ def get_string_array(nc_group, var_name):

class nc_point_obs(met_point_obs):

def __init__(self, nc_filename=None):
super().__init__()
if nc_filename:
self.read_data(nc_filename)

# args should be string, list, or dictionary
def get_nc_filename(self, args):
nc_filename = None
Expand All @@ -67,62 +74,65 @@ def get_nc_filename(self, args):

def read_data(self, nc_filename):
method_name = f"{self.__class__.__name__}.read_data()"
if nc_filename is None:
self.log_error_msg(f"{method_name} The input NetCDF filename is missing")
elif not os.path.exists(nc_filename):
self.log_error_msg(f"{method_name} input NetCDF file ({nc_filename}) does not exist")
else:
if not nc_filename:
raise TypeError(f"{method_name} The input NetCDF filename is missing")
if not os.path.exists(nc_filename):
raise TypeError(f"{method_name} input NetCDF file ({nc_filename}) does not exist")

try:
dataset = nc.Dataset(nc_filename, 'r')
except OSError:
raise TypeError(f"{method_name} Could not open NetCDF file ({nc_filename}")

attr_name = 'use_var_id'
use_var_id_str = dataset.getncattr(attr_name) if attr_name in dataset.ncattrs() else "false"
self.use_var_id = use_var_id_str.lower() == 'true'

# Header
self.hdr_typ = dataset['hdr_typ'][:]
self.hdr_sid = dataset['hdr_sid'][:]
self.hdr_vld = dataset['hdr_vld'][:]
self.hdr_lat = dataset['hdr_lat'][:]
self.hdr_lon = dataset['hdr_lon'][:]
self.hdr_elv = dataset['hdr_elv'][:]
self.hdr_typ_table = met_point_nc_tools.get_string_array(dataset, 'hdr_typ_table')
self.hdr_sid_table = met_point_nc_tools.get_string_array(dataset, 'hdr_sid_table')
self.hdr_vld_table = met_point_nc_tools.get_string_array(dataset, 'hdr_vld_table')

nc_var = dataset.variables.get('obs_unit', None)
if nc_var:
self.obs_var_unit = nc_var[:]
nc_var = dataset.variables.get('obs_desc', None)
if nc_var:
self.obs_var_desc = nc_var[:]

nc_var = dataset.variables.get('hdr_prpt_typ', None)
if nc_var:
self.hdr_prpt_typ = nc_var[:]
nc_var = dataset.variables.get('hdr_irpt_typ', None)
if nc_var:
self.hdr_irpt_typ = nc_var[:]
nc_var = dataset.variables.get('hdr_inst_typ', None)
if nc_var:
self.hdr_inst_typ =nc_var[:]

#Observation data
self.hdr_sid = dataset['hdr_sid'][:]
self.obs_qty = np.array(dataset['obs_qty'][:])
self.obs_hid = np.array(dataset['obs_hid'][:])
self.obs_lvl = np.array(dataset['obs_lvl'][:])
self.obs_hgt = np.array(dataset['obs_hgt'][:])
self.obs_val = np.array(dataset['obs_val'][:])
nc_var = dataset.variables.get('obs_vid', None)
if nc_var is None:
self.use_var_id = False
nc_var = dataset.variables.get('obs_gc', None)
else:
self.obs_var_table = met_point_nc_tools.get_string_array(dataset, 'obs_var')
if nc_var:
self.obs_vid = np.array(nc_var[:])

attr_name = 'use_var_id'
use_var_id_str = dataset.getncattr(attr_name) if attr_name in dataset.ncattrs() else "false"
self.use_var_id = use_var_id_str.lower() == 'true'

# Header
self.hdr_typ = dataset['hdr_typ'][:]
self.hdr_sid = dataset['hdr_sid'][:]
self.hdr_vld = dataset['hdr_vld'][:]
self.hdr_lat = dataset['hdr_lat'][:]
self.hdr_lon = dataset['hdr_lon'][:]
self.hdr_elv = dataset['hdr_elv'][:]
self.hdr_typ_table = met_point_nc_tools.get_string_array(dataset, 'hdr_typ_table')
self.hdr_sid_table = met_point_nc_tools.get_string_array(dataset, 'hdr_sid_table')
self.hdr_vld_table = met_point_nc_tools.get_string_array(dataset, 'hdr_vld_table')

nc_var = dataset.variables.get('obs_unit', None)
if nc_var:
self.obs_var_unit = nc_var[:]
nc_var = dataset.variables.get('obs_desc', None)
if nc_var:
self.obs_var_desc = nc_var[:]

nc_var = dataset.variables.get('hdr_prpt_typ', None)
if nc_var:
self.hdr_prpt_typ = nc_var[:]
nc_var = dataset.variables.get('hdr_irpt_typ', None)
if nc_var:
self.hdr_irpt_typ = nc_var[:]
nc_var = dataset.variables.get('hdr_inst_typ', None)
if nc_var:
self.hdr_inst_typ =nc_var[:]

#Observation data
self.hdr_sid = dataset['hdr_sid'][:]
self.obs_qty = np.array(dataset['obs_qty'][:])
self.obs_hid = np.array(dataset['obs_hid'][:])
self.obs_lvl = np.array(dataset['obs_lvl'][:])
self.obs_hgt = np.array(dataset['obs_hgt'][:])
self.obs_val = np.array(dataset['obs_val'][:])
nc_var = dataset.variables.get('obs_vid', None)
if nc_var is None:
self.use_var_id = False
nc_var = dataset.variables.get('obs_gc', None)
else:
self.obs_var_table = met_point_nc_tools.get_string_array(dataset, 'obs_var')
if nc_var:
self.obs_vid = np.array(nc_var[:])

self.obs_qty_table = met_point_nc_tools.get_string_array(dataset, 'obs_qty_table')
self.obs_qty_table = met_point_nc_tools.get_string_array(dataset, 'obs_qty_table')

def save_ncfile(self, nc_filename):
met_data = self.get_point_data()
Expand Down Expand Up @@ -274,6 +284,22 @@ def write_nc_data(nc_dataset, point_obs):
print(f' === ERROR at {method_name} type(nc_dataset)={type(nc_dataset)} type(point_obs)={type(point_obs)}')
raise

def to_pandas(self):
return pd.DataFrame({
'typ': [self.hdr_typ_table[self.hdr_typ[i]] for i in self.obs_hid],
'sid': [self.hdr_sid_table[self.hdr_sid[i]] for i in self.obs_hid],
'vld': [self.hdr_vld_table[self.hdr_vld[i]] for i in self.obs_hid],
'lat': [self.hdr_lat[i] for i in self.obs_hid],
'lon': [self.hdr_lon[i] for i in self.obs_hid],
'elv': [self.hdr_elv[i] for i in self.obs_hid],
'var': [self.obs_var_table[i] if self.use_var_id else f'{i}'
for i in self.obs_vid],
'lvl': self.obs_lvl,
'hgt': self.obs_hgt,
'qc': [np.nan if np.ma.is_masked(i) else self.obs_qty_table[i]
for i in self.obs_qty],
'obs': self.obs_val,
})

def main(argv):
if len(argv) != 1 and argv[1] != ARG_PRINT_DATA:
Expand All @@ -289,5 +315,5 @@ def main(argv):
point_obs_data.print_point_data(met_point_data)

if __name__ == '__main__':
main()
main(sys.argv)
print('Done python script')

0 comments on commit 79ac568

Please sign in to comment.