From 4b2e57eaed7e1070b3d7560aa38822a39d118e87 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 30 Aug 2023 17:52:49 +0200 Subject: [PATCH 1/3] Add ruff and pre-commit config and fix issues --- .pre-commit-config.yaml | 16 ++++ isimip_qc/checks/3d.py | 14 ++- isimip_qc/checks/__init__.py | 2 +- isimip_qc/checks/attributes.py | 16 ++-- isimip_qc/checks/dataset.py | 10 +- isimip_qc/checks/dimensions.py | 30 +++--- isimip_qc/checks/variables/latlon.py | 69 +++++++++----- isimip_qc/checks/variables/time.py | 25 +++-- isimip_qc/checks/variables/time_resolution.py | 30 ++++-- isimip_qc/checks/variables/var.py | 92 +++++++++++++------ isimip_qc/checks/variables/var3d.py | 25 +++-- isimip_qc/config.py | 3 +- isimip_qc/main.py | 12 ++- isimip_qc/models.py | 30 +++--- isimip_qc/utils/datamodel.py | 4 +- isimip_qc/utils/files.py | 9 +- pyproject.toml | 46 ++++++++++ 17 files changed, 299 insertions(+), 134 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7c46c13 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: + - repo: meta + hooks: + - id: check-hooks-apply + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-ast + - id: end-of-file-fixer + - id: trailing-whitespace + - id: debug-statements + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.284 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] diff --git a/isimip_qc/checks/3d.py b/isimip_qc/checks/3d.py index a670169..e8589d2 100644 --- a/isimip_qc/checks/3d.py +++ b/isimip_qc/checks/3d.py @@ -27,11 +27,13 @@ def check_3d(file): try: variable = file.dataset.variables.get(file.variable_name) if variable is None: - file.critical('Variable "%s" from file name not found inside the file! Check NetCDF header. Stopping tool!', file.variable_name) + file.critical('Variable "%s" from file name not found inside the file! Check NetCDF header. Stopping tool!', + file.variable_name) raise SystemExit(1) - except AttributeError: - file.critical('Variable "%s" from file name not found inside the file! Check NetCDF header. Stopping tool!', file.variable_name) - raise SystemExit(1) + except AttributeError as e: + file.critical('Variable "%s" from file name not found inside the file! Check NetCDF header. Stopping tool!', + file.variable_name) + raise SystemExit(1) from e definition = settings.DEFINITIONS.get('variable', {}).get(file.specifiers.get('variable')) if definition is None: @@ -66,7 +68,9 @@ def check_3d(file): for pos in range(0, file_dim_len): if variable.dimensions[pos] not in ['time', 'lat', 'lon']: break - file.critical('Variable "%s" has "time", "lat" or "lon" as second dependeny. Depencency order must be [time, %s, lat, lon]. "time" is first always.', file.variable_name, variable.dimensions[pos]) + file.critical('Variable "%s" has "time", "lat" or "lon" as second dependeny.' + ' Depencency order must be [time, %s, lat, lon]. "time" is first always.', + file.variable_name, variable.dimensions[pos]) file.dim_vertical = variable.dimensions[pos] else: file.dim_vertical = variable.dimensions[1] diff --git a/isimip_qc/checks/__init__.py b/isimip_qc/checks/__init__.py index 540761e..2dbec85 100644 --- a/isimip_qc/checks/__init__.py +++ b/isimip_qc/checks/__init__.py @@ -15,7 +15,7 @@ parts = file_path.relative_to(checks_dir).with_suffix('').parts module_name = 'isimip_qc.checks.{}'.format('.'.join(parts)) module = importlib.import_module(module_name) - functions += [function for function in inspect.getmembers(module, inspect.isfunction)] + functions += list(inspect.getmembers(module, inspect.isfunction)) # find checks checks = [function[1] for function in functions if function[0].startswith('check_')] diff --git a/isimip_qc/checks/attributes.py b/isimip_qc/checks/attributes.py index 99d578b..7fe1d89 100644 --- a/isimip_qc/checks/attributes.py +++ b/isimip_qc/checks/attributes.py @@ -1,10 +1,10 @@ import uuid -from email.utils import parseaddr from datetime import datetime +from email.utils import parseaddr from .. import __version__ from ..config import settings -from ..fixes import fix_set_global_attr, fix_remove_global_attr +from ..fixes import fix_remove_global_attr, fix_set_global_attr def check_isimip_id(file): @@ -47,11 +47,13 @@ def check_isimip_protocol_version(file): file.info('Global attribute "isimip_protocol_version" matches current protocol version (%s).', version) else: - file.info('Global attribute "isimip_protocol_version" (%s) does not match tool current protocol version (%s).', - version, protocol_version, fix={ - 'func': fix_set_global_attr, - 'args': (file, 'isimip_protocol_version', protocol_version) - }) + file.info( + 'Global attribute "isimip_protocol_version" (%s) does not match tool current protocol version (%s).', + version, protocol_version, fix={ + 'func': fix_set_global_attr, + 'args': (file, 'isimip_protocol_version', protocol_version) + } + ) except AttributeError: file.info('Global attribute "isimip_protocol_version" not yet set.', fix={ diff --git a/isimip_qc/checks/dataset.py b/isimip_qc/checks/dataset.py index 39271a8..403954c 100644 --- a/isimip_qc/checks/dataset.py +++ b/isimip_qc/checks/dataset.py @@ -1,5 +1,4 @@ -from ..fixes import (fix_remove_variable_attr, fix_rename_dimension, - fix_rename_variable, fix_rename_variable_attr) +from ..fixes import fix_remove_variable_attr, fix_rename_dimension, fix_rename_variable, fix_rename_variable_attr def check_data_model(file): @@ -24,7 +23,8 @@ def check_zip(file): if zlib: complevel = variable.filters().get('complevel') if complevel < 4: - file.warn('Variable "%s" compression level is "%s". Should be >= 5.', file.variable_name, complevel, fix_datamodel=True) + file.warn('Variable "%s" compression level is "%s". Should be >= 5.', + file.variable_name, complevel, fix_datamodel=True) else: file.info('Variable "%s" compression level looks good (%s)', file.variable_name, complevel) else: @@ -54,7 +54,9 @@ def check_lower_case(file): for attr in variable.__dict__: if attr not in ['_FillValue']: - if attr not in ['axis', 'standard_name', 'long_name', 'calendar', 'missing_value', 'units', 'comment', 'enteric_infection', 'description', 'unit_conversion_info', 'positive', 'bounds', 'classes']: + if attr not in ['axis', 'standard_name', 'long_name', 'calendar', 'missing_value', + 'units', 'comment', 'enteric_infection', 'description', 'unit_conversion_info', + 'positive', 'bounds', 'classes']: file.warn('Attribute "%s" for variable "%s" is not needed.', attr, variable_name, fix={ 'func': fix_remove_variable_attr, 'args': (file, variable_name, attr) diff --git a/isimip_qc/checks/dimensions.py b/isimip_qc/checks/dimensions.py index 7097932..f38cc06 100644 --- a/isimip_qc/checks/dimensions.py +++ b/isimip_qc/checks/dimensions.py @@ -21,11 +21,12 @@ def check_lon_dimension(file): grid_info = settings.DEFINITIONS['climate_forcing'].get(climate_forcing)['grid']['lon_size'] try: lon_size = grid_info[sens_scenario] - except: + except KeyError: lon_size = grid_info['default'] if lon_size != file.dataset.dimensions.get('lon').size: - file.warn('Unexpected number of longitudes found (%s). Should be %s', file.dataset.dimensions.get('lon').size, lon_size) + file.warn('Unexpected number of longitudes found (%s). Should be %s', + file.dataset.dimensions.get('lon').size, lon_size) else: file.info('%s longitudes defined.', lon_size) @@ -50,11 +51,12 @@ def check_lat_dimension(file): grid_info = settings.DEFINITIONS['climate_forcing'].get(climate_forcing)['grid']['lat_size'] try: lat_size = grid_info[sens_scenario] - except: + except KeyError: lat_size = grid_info['default'] if lat_size != file.dataset.dimensions.get('lat').size: - file.warn('Unexpected number of latitudes found (%s). Should be %s', file.dataset.dimensions.get('lat').size, lat_size) + file.warn('Unexpected number of latitudes found (%s). Should be %s', + file.dataset.dimensions.get('lat').size, lat_size) else: file.info('%s latitudes defined.', lat_size) @@ -67,7 +69,8 @@ def check_time_dimension(file): def check_depth_dimension(file): if file.is_3d: if file.dataset.dimensions.get(file.dim_vertical) is None: - file.error('Valid 4th dimension is missing. Should be of of [depth, bins]. Found "%s" instead.', file.dim_vertical) + file.error('Valid 4th dimension is missing. Should be of of [depth, bins]. Found "%s" instead.', + file.dim_vertical) def check_dimensions(file): @@ -77,17 +80,22 @@ def check_dimensions(file): dim_len = len(variable.dimensions) if file.is_time_fixed: if variable.dimensions[0] != 'lat' or variable.dimensions[1] != 'lon': - file.error('Dimension order for variable "%s" is %s. Should be ["lat", "lon"].', file.variable_name, variable.dimensions) + file.error('Dimension order for variable "%s" is %s. Should be ["lat", "lon"].', + file.variable_name, variable.dimensions) else: - file.info('Dimensions for variable "%s" look good: %s.', file.variable_name, variable.dimensions) + file.info('Dimensions for variable "%s" look good: %s.', + file.variable_name, variable.dimensions) elif file.is_2d: if variable.dimensions[0] != 'time' or variable.dimensions[1] != 'lat' or variable.dimensions[2] != 'lon': - file.error('Dimension order for variable "%s" is %s. Should be ["time", "lat", "lon"].', file.variable_name, variable.dimensions) + file.error('Dimension order for variable "%s" is %s. Should be ["time", "lat", "lon"].', + file.variable_name, variable.dimensions) else: - file.info('Dimensions for variable "%s" look good: %s.', file.variable_name, variable.dimensions) + file.info('Dimensions for variable "%s" look good: %s.', + file.variable_name, variable.dimensions) elif file.is_3d: if variable.dimensions[0] != 'time' or variable.dimensions[2] != 'lat' or variable.dimensions[3] != 'lon': - file.error('Dimension order for variable "%s" is %s. Should be ["time", "%s" , "lat", "lon"].', file.variable_name, variable.dimensions, file.dim_vertical) + file.error('Dimension order for variable "%s" is %s. Should be ["time", "%s" , "lat", "lon"].', + file.variable_name, variable.dimensions, file.dim_vertical) else: file.info('Dimensions for variable "%s" look good.', file.variable_name) else: @@ -97,7 +105,7 @@ def check_dimensions(file): dimension_definition = settings.DEFINITIONS['dimensions'].get(dimension_name) if dimension_definition: - # size of lat and lon are checked above + # size of lat and lon are checked above if dimension_definition.get('specifier') not in ['lat', 'lon']: size = dimension_definition.get('size') if size and dimension.size != size: diff --git a/isimip_qc/checks/variables/latlon.py b/isimip_qc/checks/variables/latlon.py index 6304769..2214c38 100644 --- a/isimip_qc/checks/variables/latlon.py +++ b/isimip_qc/checks/variables/latlon.py @@ -1,4 +1,5 @@ import numpy as np + from isimip_qc.config import settings from isimip_qc.fixes import fix_set_variable_attr @@ -20,7 +21,8 @@ def check_latlon_variable(file): # check dtype dtypes = ['float32', 'float64'] if var.dtype not in dtypes: - file.warn('Data type of "%s" is "%s". Should be float or double (one of %s).', variable, var.dtype, dtypes) + file.warn('Data type of "%s" is "%s". Should be float or double (one of %s).', + variable, var.dtype, dtypes) # check axis axis = var_definition.get('axis') @@ -40,43 +42,61 @@ def check_latlon_variable(file): standard_name = var_definition.get('standard_name') try: if var.standard_name != standard_name: - file.warn('"standard_name" attribute of "%s" is "%s". Should be "%s".', variable, var.standard_name, standard_name, fix={ + file.warn( + '"standard_name" attribute of "%s" is "%s". Should be "%s".', + variable, var.standard_name, standard_name, fix={ + 'func': fix_set_variable_attr, + 'args': (file, variable, 'standard_name', standard_name) + } + ) + except AttributeError: + file.warn( + '"standard_name" attribute of "%s" is missing. Should be "%s".', + variable, standard_name, fix={ 'func': fix_set_variable_attr, 'args': (file, variable, 'standard_name', standard_name) - }) - except AttributeError: - file.warn('"standard_name" attribute of "%s" is missing. Should be "%s".', variable, standard_name, fix={ - 'func': fix_set_variable_attr, - 'args': (file, variable, 'standard_name', standard_name) - }) + } + ) # check long_name long_names = var_definition.get('long_names', []) try: if var.long_name not in long_names: - file.warn('"long_name" attribute of "%s" is %s". Should be in %s.', variable, var.long_name, long_names, fix={ + file.warn( + '"long_name" attribute of "%s" is %s". Should be in %s.', + variable, var.long_name, long_names, fix={ + 'func': fix_set_variable_attr, + 'args': (file, variable, 'long_name', long_names[0]) + } + ) + except AttributeError: + file.warn( + '"long_name" attribute of "%s" is missing. Should be "%s".', + variable, long_names[0], fix={ 'func': fix_set_variable_attr, 'args': (file, variable, 'long_name', long_names[0]) - }) - except AttributeError: - file.warn('"long_name" attribute of "%s" is missing. Should be "%s".', variable, long_names[0], fix={ - 'func': fix_set_variable_attr, - 'args': (file, variable, 'long_name', long_names[0]) - }) + } + ) # check units units = var_definition.get('units') try: if var.units != units: - file.warn('"units" attribute for "%s" is "%s". Should be "%s".', variable, var.units, units, fix={ + file.warn( + '"units" attribute for "%s" is "%s". Should be "%s".', + variable, var.units, units, fix={ + 'func': fix_set_variable_attr, + 'args': (file, variable, 'units', units) + } + ) + except AttributeError: + file.warn( + '"units" attribute for "%s" is missing. Should be "%s".', + variable, units, fix={ 'func': fix_set_variable_attr, 'args': (file, variable, 'units', units) - }) - except AttributeError: - file.warn('"units" attribute for "%s" is missing. Should be "%s".', variable, units, fix={ - 'func': fix_set_variable_attr, - 'args': (file, variable, 'units', units) - }) + } + ) if settings.SECTOR not in ['marine-fishery_regional', 'water_regional', 'lakes_local', 'forestry']: @@ -94,7 +114,7 @@ def check_latlon_variable(file): elif var.name == 'lon': minimum = grid_info['lon_min'][sens_scenario] maximum = grid_info['lon_max'][sens_scenario] - except: + except KeyError: if var.name == 'lat': minimum = grid_info['lat_min']['default'] maximum = grid_info['lat_max']['default'] @@ -126,6 +146,7 @@ def check_latlon_variable(file): lat_first = file.dataset.variables.get('lat')[0] lat_last = file.dataset.variables.get('lat')[-1] if lat_first < lat_last: - file.warn('Latitudes in wrong order. Index should range from north to south. (found %s to %s)', lat_first, lat_last) + file.warn('Latitudes in wrong order. Index should range from north to south. (found %s to %s)', + lat_first, lat_last) else: file.info('Latitude index order looks good (N to S).') diff --git a/isimip_qc/checks/variables/time.py b/isimip_qc/checks/variables/time.py index c8fb8a1..249eb32 100644 --- a/isimip_qc/checks/variables/time.py +++ b/isimip_qc/checks/variables/time.py @@ -1,6 +1,4 @@ -import calendar -import netCDF4 from isimip_qc.config import settings from isimip_qc.fixes import fix_set_variable_attr @@ -39,10 +37,13 @@ def check_time_variable(file): standard_name = time_definition.get('standard_name') try: if time.standard_name != standard_name: - file.warn('"standard_name" attribute of "time" is "%s". Should be "%s".', time.standard_name, standard_name, fix={ - 'func': fix_set_variable_attr, - 'args': (file, 'time', 'standard_name', standard_name) - }) + file.warn( + '"standard_name" attribute of "time" is "%s". Should be "%s".', + time.standard_name, standard_name, fix={ + 'func': fix_set_variable_attr, + 'args': (file, 'time', 'standard_name', standard_name) + } + ) except AttributeError: file.warn('"standard_name" attribute of "time" is missing. Should be "%s".', standard_name, fix={ 'func': fix_set_variable_attr, @@ -118,7 +119,8 @@ def check_time_span_periods(file): definition_startyear = settings.DEFINITIONS['time_span'].get('start_fut')['value'] definition_endyear = settings.DEFINITIONS['time_span'].get('end_fut')['value'] else: - file.warn('Skipping check for simulation period as the period itself could not be determined from the file path (pre-industrial, historical or future).') + file.warn('Skipping check for simulation period as the period itself could not be' + ' determined from the file path (pre-industrial, historical or future).') return file_startyear = file.specifiers.get('start_year') @@ -128,7 +130,8 @@ def check_time_span_periods(file): if time_resolution not in ['daily', 'fixed']: if definition_startyear != file_startyear or definition_endyear != file_endyear: - file.warn('time period covered by file (%s-%s) does not match input data time span (%s-%s). Ensure to prepare the full period for all variables using the latest input data set.', + file.warn('time period covered by file (%s-%s) does not match input data time span (%s-%s).' + ' Ensure to prepare the full period for all variables using the latest input data set.', file_startyear, file_endyear, definition_startyear, definition_endyear) else: file.info('File is covering the full simulation period (by file name)') @@ -140,6 +143,8 @@ def check_time_span_periods(file): last_file_startyear = 2011 if file_startyear == last_file_startyear: - if definition_endyear != file_endyear: - file.warn('Last year of time period covered by file (%s) does not match end of input data time span (%s). Ensure to prepare the full period for all variables using the latest input data set.', + if definition_endyear != file_endyear: + file.warn('Last year of time period covered by file (%s) does not match end of input' + ' data time span (%s). Ensure to prepare the full period for all variables' + ' using the latest input data set.', file_endyear, definition_endyear) diff --git a/isimip_qc/checks/variables/time_resolution.py b/isimip_qc/checks/variables/time_resolution.py index 0d91f43..09d734c 100644 --- a/isimip_qc/checks/variables/time_resolution.py +++ b/isimip_qc/checks/variables/time_resolution.py @@ -1,6 +1,7 @@ import calendar import netCDF4 + from isimip_qc.config import settings @@ -24,11 +25,13 @@ def check_time_resolution(file): return if time_resolution in ['monthly', 'annual'] and time_calendar == '360_day': - file.error('360_day calendar is not allowed for monthly and annual data anymore. Use one of ["standard", "proleptic_gregorian", "365_day", "366_day"]') + file.error('360_day calendar is not allowed for monthly and annual data anymore.' + ' Use one of ["standard", "proleptic_gregorian", "365_day", "366_day"]') return if 'days' not in time_units: - file.error('time index has to be in daily increments for all calendars used (time.unit to be like "days since ...".') + file.error('time index has to be in daily increments for all calendars used' + ' (time.unit to be like "days since ...".') return if file.dataset.data_model in ['NETCDF4', 'NETCDF4_CLASSIC']: @@ -54,9 +57,12 @@ def check_time_resolution(file): nyears_file = endyear_file - startyear_file + 1 if startyear_nc != startyear_file or endyear_nc != endyear_file: - file.error('Start and/or end year of NetCDF time axis (%s-%s) doesn\'t match period defined in file name (%s-%s)', startyear_nc, endyear_nc, startyear_file, endyear_file) + file.error('Start and/or end year of NetCDF time axis (%s-%s) doesn\'t' + ' match period defined in file name (%s-%s)', + startyear_nc, endyear_nc, startyear_file, endyear_file) else: - file.info('Time period covered by this file matches the internal time axis (%s-%s)', startyear_nc, endyear_nc) + file.info('Time period covered by this file matches the internal time axis (%s-%s)', + startyear_nc, endyear_nc) if time_resolution == 'daily': if time_calendar in ['proleptic_gregorian', 'standard']: @@ -74,19 +80,25 @@ def check_time_resolution(file): time_days = nyears_file * 360 if time_days != time_steps: - file.error('Number of internal time steps (%s) does not match the expected number from the file name specifiers (%s). ("%s" calendar found)', time_steps, time_days, time_calendar) + file.error('Number of internal time steps (%s) does not match the expected' + ' number from the file name specifiers (%s). ("%s" calendar found)', + time_steps, time_days, time_calendar) else: - file.info('Correct number of time steps (%s) given the defined calendar (%s)', time_steps, time_calendar) + file.info('Correct number of time steps (%s) given the defined calendar (%s)', + time_steps, time_calendar) elif time_resolution == 'monthly': time_months = nyears_file * 12 if time_months != time_steps: - file.error('Number of internal time steps (%s) does not match the expected number from the file name specifiers (%s).', time_steps, time_months) + file.error('Number of internal time steps (%s) does not match the expected' + ' number from the file name specifiers (%s).', time_steps, time_months) else: file.info('Correct number of time steps (%s).', time_steps) elif time_resolution == 'annual': if nyears_file != time_steps: - file.error('Number of internal time steps (%s) does not match the expected number from the file name specifiers (%s).', time_steps, nyears_file) + file.error('Number of internal time steps (%s) does not match the expected' + ' number from the file name specifiers (%s).', time_steps, nyears_file) else: file.info('Correct number of time steps (%s).', time_steps) else: - file.warn('Could not check for the correct number of time steps because of wrong data model (%s). Has to be NETCDF4_CLASSIC.', file.dataset.data_model) + file.warn('Could not check for the correct number of time steps because of wrong' + ' data model (%s). Has to be NETCDF4_CLASSIC.', file.dataset.data_model) diff --git a/isimip_qc/checks/variables/var.py b/isimip_qc/checks/variables/var.py index a6ffdfb..76ffb4d 100644 --- a/isimip_qc/checks/variables/var.py +++ b/isimip_qc/checks/variables/var.py @@ -2,6 +2,7 @@ import netCDF4 import numpy as np + from isimip_qc.config import settings from isimip_qc.fixes import fix_set_variable_attr @@ -20,7 +21,8 @@ def check_variable(file): else: # check file name and NetCDF variable to match each other if variable.name != file.variable_name: - file.error('File name variable (%s) does not match internal variable name (%s).', file.variable_name, variable.name) + file.error('File name variable (%s) does not match internal variable name (%s).', + file.variable_name, variable.name) # check dtype if variable.dtype != 'float32': @@ -43,7 +45,7 @@ def check_variable(file): try: lat_size = grid_info['lat_size'][sens_scenario] lon_size = grid_info['lon_size'][sens_scenario] - except: + except KeyError: lat_size = grid_info['lat_size']['default'] lon_size = grid_info['lon_size']['default'] @@ -54,13 +56,22 @@ def check_variable(file): if file.is_2d: if chunking[0] != 1 or chunking[1] != lat_size or chunking[2] != lon_size: - file.warn('%s.chunking=%s should be [1, %s, %s] (with proper depencency order).', file.variable_name, chunking, lat_size, lon_size, fix_datamodel=True) + file.warn('%s.chunking=%s should be [1, %s, %s] (with proper depencency order).', + file.variable_name, chunking, lat_size, lon_size, fix_datamodel=True) else: file.info('Variable properly chunked [1, %s, %s].', lat_size, lon_size) if file.is_3d: var3d_size = file.dataset.dimensions.get(file.dim_vertical).size - if chunking[0] != 1 or ( chunking[1] != 1 and chunking[1] != var3d_size ) or chunking[2] != lat_size or chunking[3] != lon_size: - file.warn('%s.chunking=%s. Should be [1, %s, %s, %s] or [1, 1, %s, %s] (with proper depencency order).', file.variable_name, chunking, var3d_size, lat_size, lon_size, lat_size, lon_size, fix_datamodel=True) + if ( + chunking[0] != 1 + or (chunking[1] != 1 and chunking[1] != var3d_size) + or chunking[2] != lat_size + or chunking[3] != lon_size + ): + file.warn('%s.chunking=%s. Should be [1, %s, %s, %s] or [1, 1, %s, %s]' + ' (with proper depencency order).', + file.variable_name, chunking, var3d_size, lat_size, lon_size, + lat_size, lon_size, fix_datamodel=True) else: file.info('Variable properly chunked [1, %s, %s, %s].', var3d_size, lat_size, lon_size) else: @@ -78,40 +89,54 @@ def check_variable(file): if definition_dimensions: if variable.dimensions not in [definition_dimensions, default_dimensions]: - file.error('Found %s dimensions for "%s". Must be %s.', variable.dimensions, file.variable_name, default_dimensions) + file.error('Found %s dimensions for "%s". Must be %s.', + variable.dimensions, file.variable_name, default_dimensions) else: if variable.dimensions != default_dimensions: - file.error('Found %s dimensions for "%s". Must be %s.', variable.dimensions, file.variable_name, default_dimensions) + file.error('Found %s dimensions for "%s". Must be %s.', + variable.dimensions, file.variable_name, default_dimensions) # check standard_name standard_name = definition.get('standard_name') if standard_name: try: if variable.standard_name != standard_name: - file.warn('Attribute standard_name="%s" for variable "%s". Should be "%s".', variable.standard_name, file.variable_name, standard_name, fix={ + file.warn( + 'Attribute standard_name="%s" for variable "%s". Should be "%s".', + variable.standard_name, file.variable_name, standard_name, fix={ + 'func': fix_set_variable_attr, + 'args': (file, file.variable_name, 'standard_name', standard_name) + } + ) + except AttributeError: + file.warn( + 'Attribute standard_name is missing for variable "%s". Should be "%s".', + file.variable_name, standard_name, fix={ 'func': fix_set_variable_attr, 'args': (file, file.variable_name, 'standard_name', standard_name) - }) - except AttributeError: - file.warn('Attribute standard_name is missing for variable "%s". Should be "%s".', file.variable_name, standard_name, fix={ - 'func': fix_set_variable_attr, - 'args': (file, file.variable_name, 'standard_name', standard_name) - }) + } + ) # check long_name long_name = definition.get('long_name') if long_name: try: if variable.long_name != long_name: - file.warn('Attribute long_name="%s" for variable "%s". Should be "%s".', variable.long_name, file.variable_name, long_name, fix={ + file.warn( + 'Attribute long_name="%s" for variable "%s". Should be "%s".', + variable.long_name, file.variable_name, long_name, fix={ + 'func': fix_set_variable_attr, + 'args': (file, file.variable_name, 'long_name', long_name) + } + ) + except AttributeError: + file.warn( + 'Attribute long_name is missing for variable "%s". Should be "%s".', + file.variable_name, long_name, fix={ 'func': fix_set_variable_attr, 'args': (file, file.variable_name, 'long_name', long_name) - }) - except AttributeError: - file.warn('Attribute long_name is missing for variable "%s". Should be "%s".', file.variable_name, long_name, fix={ - 'func': fix_set_variable_attr, - 'args': (file, file.variable_name, 'long_name', long_name) - }) + } + ) # check variable units units = definition.get('units') @@ -123,10 +148,13 @@ def check_variable(file): else: file.info('Variable unit matches protocol definition (%s).', variable.units) except AttributeError: - file.warn('Variable "%s" units attribute is missing. Should be "%s".', file.variable_name, units, fix={ - 'func': fix_set_variable_attr, - 'args': (file, file.variable_name, 'units', units) - }) + file.warn( + 'Variable "%s" units attribute is missing. Should be "%s".', + file.variable_name, units, fix={ + 'func': fix_set_variable_attr, + 'args': (file, file.variable_name, 'units', units) + } + ) else: file.warn('No units information for variable "%s" in definition.', file.variable_name) @@ -159,7 +187,8 @@ def check_variable(file): valid_min = definition.get('valid_min') valid_max = definition.get('valid_max') if (valid_min is not None) and (valid_max is not None): - file.info("Checking values for valid minimum and maximum range defined in the protocol. This could take some time...") + file.info('Checking values for valid minimum and maximum range defined in' + ' the protocol. This could take some time...') lat = file.dataset.variables.get('lat') lon = file.dataset.variables.get('lon') time = file.dataset.variables.get('time') @@ -168,7 +197,7 @@ def check_variable(file): too_high = np.argwhere(variable[:] > valid_max) time = file.dataset.variables.get('time') - time_resolution = file.specifiers.get('time_step') + file.specifiers.get('time_step') try: time_units = time.units @@ -183,7 +212,8 @@ def check_variable(file): return if too_low.size: - file.warn('%i values are lower than the valid minimum (%.2E %s).', too_low.shape[0], valid_min, units) + file.warn('%i values are lower than the valid minimum (%.2E %s).', + too_low.shape[0], valid_min, units) if settings.LOG_LEVEL == 'VRDETAIL': file.warn('%i lowest values are :', min(settings.MINMAX, too_low.shape[0])) @@ -207,7 +237,8 @@ def check_variable(file): too_low_sorted[i][1], units) if too_high.size: - file.warn('%i values are higher than the valid maximum (%.2E %s).', too_high.shape[0], valid_max, units) + file.warn('%i values are higher than the valid maximum (%.2E %s).', + too_high.shape[0], valid_max, units) if settings.LOG_LEVEL == 'VRDETAIL': file.warn('%i highest values are :', min(settings.MINMAX, too_high.shape[0])) @@ -234,4 +265,5 @@ def check_variable(file): file.info('Values are within valid range (%.2E to %.2E).', valid_min, valid_max) else: - file.warn('No min and/or max definition found for variable "%s" in protocol. Skipping test.', file.variable_name) + file.warn('No min and/or max definition found for variable "%s" in protocol. Skipping test.', + file.variable_name) diff --git a/isimip_qc/checks/variables/var3d.py b/isimip_qc/checks/variables/var3d.py index a30a6d9..f4672d8 100644 --- a/isimip_qc/checks/variables/var3d.py +++ b/isimip_qc/checks/variables/var3d.py @@ -1,4 +1,3 @@ -import numpy as np from isimip_qc.config import settings from isimip_qc.fixes import fix_set_variable_attr @@ -26,7 +25,8 @@ def check_attribute(var3d, attr_type, attribute): # check if vertical dimension ha a variable associated if var3d is None and file.dim_vertical: - file.error('Found dimension (%s) but no variable linked to it. Introduce "%s" as variable also.', file.dim_vertical, file.dim_vertical) + file.error('Found dimension (%s) but no variable linked to it. Introduce "%s" as variable also.', + file.dim_vertical, file.dim_vertical) # check for definition in protocol for 3d variable if not var3d_definition: @@ -52,7 +52,8 @@ def check_attribute(var3d, attr_type, attribute): depth_last = file.dataset.variables.get(file.dim_vertical)[-1] if depth_first > depth_last: - file.warn('Depths in wrong order. Should increase with depth . (found %s to %s)', depth_first, depth_last) + file.warn('Depths in wrong order. Should increase with depth . (found %s to %s)', + depth_first, depth_last) else: file.info('Depths order looks good (positive down).') @@ -63,7 +64,8 @@ def check_attribute(var3d, attr_type, attribute): levlak_last = file.dataset.variables.get(file.dim_vertical)[-1] if levlak_first > levlak_last: - file.warn('"levlak" in wrong order. Should increase with depth . (found %s to %s)', levlak_first, levlak_last) + file.warn('"levlak" in wrong order. Should increase with depth . (found %s to %s)', + levlak_first, levlak_last) else: file.info('"levlak" order looks good (positive down).') @@ -72,24 +74,29 @@ def check_attribute(var3d, attr_type, attribute): depth_file = file.dataset.variables.get('depth') if depth_file is None: - file.warn('Variable "depth" not found. Introduce layer vertical center depths in [m] as depth(levlak,lat,lon) or depth(time,levlak,lat,lon)') + file.warn('Variable "depth" not found. Introduce layer vertical center' + ' depths in [m] as depth(levlak,lat,lon) or depth(time,levlak,lat,lon)') else: if len(depth_file.dimensions) == 4: if depth_file.dimensions[1] != 'levlak': - file.error('Time varying "depth" variable has no dependency for "levlak" level index. Expecting: depth(time,levlak,lat,lon)') + file.error('Time varying "depth" variable has no dependency for "levlak"' + ' level index. Expecting: depth(time,levlak,lat,lon)') elif len(depth_file.dimensions) == 3: if depth_file.dimensions[0] != 'levlak': - file.error('Fixed-time "depth" variable has no dependency for "levlak" level index. Expecting: depth(levlak,lat,lon)') + file.error('Fixed-time "depth" variable has no dependency for "levlak"' + ' level index. Expecting: depth(levlak,lat,lon)') elif len(depth_file.dimensions) == 1: if depth_file.dimensions[0] != 'levlak': - file.error('Globally fixed "depth" variable has no dependency for "levlak" level index. Expecting: depth(levlak)') + file.error('Globally fixed "depth" variable has no dependency for "levlak"' + ' level index. Expecting: depth(levlak)') else: file.error('No proper "levlak" dependency found in "depth" variable.') depth_definition = settings.DEFINITIONS['dimensions'].get('depth') if depth_definition is None: - file.warn('Dimension "depth" is not yet defined in protocol. Skipping attribute checks for "depth".') + file.warn('Dimension "depth" is not yet defined in protocol. Skipping' + ' attribute checks for "depth".') else: for attribute in ['axis', 'standard_name', 'long_name', 'units']: attr_definition = depth_definition.get(attribute) diff --git a/isimip_qc/config.py b/isimip_qc/config.py index 0df92f8..b5b2999 100644 --- a/isimip_qc/config.py +++ b/isimip_qc/config.py @@ -1,3 +1,4 @@ +from datetime import datetime from pathlib import Path import colorlog @@ -6,8 +7,6 @@ from isimip_utils.decorators import cached_property from isimip_utils.fetch import fetch_definitions, fetch_pattern, fetch_schema -from datetime import datetime - logger = colorlog.getLogger(__name__) diff --git a/isimip_qc/main.py b/isimip_qc/main.py index e944fe2..d0b4aeb 100644 --- a/isimip_qc/main.py +++ b/isimip_qc/main.py @@ -1,5 +1,5 @@ -from os import path import sys +from os import path import colorlog @@ -37,7 +37,8 @@ def get_parser(): default='https://protocol.isimip.org https://protocol2.isimip.org', help='URL or file path to the protocol when different from official repository') parser.add_argument('--log-level', dest='log_level', default='CHECKING', - help='log level (CRITICAL, ERROR, WARN, VRDETAIL, CHECKING, SUMMARY, INFO, or DEBUG) [default: CHECKING]') + help='log level (CRITICAL, ERROR, WARN, VRDETAIL, CHECKING, SUMMARY,' + ' INFO, or DEBUG) [default: CHECKING]') parser.add_argument('--log-path', dest='log_path', help='base path for the individual log files') parser.add_argument('--log-path-level', dest='log_path_level', default='WARN', @@ -59,7 +60,8 @@ def get_parser(): parser.add_argument('--fix', dest='fix', action='store_true', default=False, help='try to fix warnings detected on the original files') parser.add_argument('--fix-datamodel', dest='fix_datamodel', action='store', nargs='?', const='nccopy', type=str, - help='also fix warnings on data model found using NCCOPY or CDO (slow). Choose preferred tool per lower case argument.') + help='also fix warnings on data model found using NCCOPY or CDO (slow).' + ' Choose preferred tool per lower case argument.') parser.add_argument('--check', dest='check', help='perform only one particular check') parser.add_argument('--force-copy-move', dest='force_copy_move', action='store_true', default=False, @@ -108,12 +110,12 @@ def main(): logger.log(CHECKING, file_path) if settings.INCLUDE_LIST: - if not any([string in str(file_path) for string in settings.INCLUDE_LIST.split(',')]): + if not any(string in str(file_path) for string in settings.INCLUDE_LIST.split(',')): logger.log(CHECKING, ' skipped by include option.') continue if settings.EXCLUDE_LIST: - if any([string in str(file_path) for string in settings.EXCLUDE_LIST.split(',')]): + if any(string in str(file_path) for string in settings.EXCLUDE_LIST.split(',')): logger.log(CHECKING, ' skipped by exclude option.') continue diff --git a/isimip_qc/models.py b/isimip_qc/models.py index 7d7a2c6..e987382 100644 --- a/isimip_qc/models.py +++ b/isimip_qc/models.py @@ -1,17 +1,21 @@ import logging import shutil -from pathlib import Path from collections import Counter +from pathlib import Path import colorlog import jsonschema from prettytable.colortable import PrettyTable -from isimip_utils.netcdf import (get_dimensions, get_global_attributes, - get_variables, open_dataset_read, - open_dataset_write) -from isimip_utils.patterns import match_file from isimip_utils.exceptions import DidNotMatch +from isimip_utils.netcdf import ( + get_dimensions, + get_global_attributes, + get_variables, + open_dataset_read, + open_dataset_write, +) +from isimip_utils.patterns import match_file from .config import settings from .utils.datamodel import call_cdo, call_nccopy @@ -20,7 +24,7 @@ from .utils.logging import SUMMARY -class File(object): +class File: def __init__(self, file_path): file_path = Path(file_path).expanduser() @@ -85,12 +89,12 @@ def warn(self, message, *args, fix=None, fix_datamodel=None): def error(self, message, *args): if self.logger is not None: self.logger.error(message, *args) - self.errors.append((message % args)) + self.errors.append(message % args) def critical(self, message, *args): if self.logger is not None: self.logger.critical(message, *args) - self.criticals.append((message % args)) + self.criticals.append(message % args) def fix_infos(self): for info in self.infos[:]: @@ -108,13 +112,14 @@ def fix_warnings(self): def fix_datamodel(self): # check if we need to fix using cdu - if any([fix_datamodel for _, _, fix_datamodel in self.warnings]): + if any(fix_datamodel for _, _, fix_datamodel in self.warnings): # fix using tmpfile tmp_abs_path = self.abs_path.parent / ('.' + self.abs_path.name + '-fix') if settings.FIX_DATAMODEL == 'cdo': if shutil.which('cdo'): self.info('Rewriting file with fixed data model using "cdo"') - call_cdo(['--history', '-s', '-z', 'zip_5', '-f', 'nc4c', '-b', 'F32', '-k', 'grid', '-copy'], self.abs_path, tmp_abs_path) + call_cdo(['--history', '-s', '-z', 'zip_5', '-f', 'nc4c', '-b', 'F32', '-k', 'grid', '-copy'], + self.abs_path, tmp_abs_path) else: self.error('"cdo" is not available for execution. Please install before.') elif settings.FIX_DATAMODEL == 'nccopy': @@ -124,7 +129,8 @@ def fix_datamodel(self): else: self.error('"nccopy" is not available for execution. Please install before.') else: - self.error('"' + settings.FIX_DATAMODEL + '" is not a valid argument for --fix-datamodel option. Chose "nccopy" or "cdo"') + self.error(f'"{settings.FIX_DATAMODEL}" is not a valid argument for --fix-datamodel option.' + ' Chose "nccopy" or "cdo"') if settings.FIX_DATAMODEL in ['nccopy', 'cdo']: # move tmp file to original file @@ -222,7 +228,7 @@ def move(self): move_file(self.abs_path, settings.CHECKED_PATH / self.path) -class Summary(object): +class Summary: def __init__(self): self.specifiers = {} diff --git a/isimip_qc/utils/datamodel.py b/isimip_qc/utils/datamodel.py index 5ca7636..68a41bd 100644 --- a/isimip_qc/utils/datamodel.py +++ b/isimip_qc/utils/datamodel.py @@ -2,12 +2,12 @@ def call_cdo(args, input_file, output_file): - args = ['cdo'] + args + [str(input_file), str(output_file)] + args = ['cdo', *args, str(input_file), str(output_file)] # print(' '.join(args)) subprocess.check_call(args) def call_nccopy(args, input_file, output_file): - args = ['nccopy'] + args + [str(input_file), str(output_file)] + args = ['nccopy', *args, str(input_file), str(output_file)] # print(' '.join(args)) subprocess.check_call(args) diff --git a/isimip_qc/utils/files.py b/isimip_qc/utils/files.py index 79d8cc3..ff2bfa3 100644 --- a/isimip_qc/utils/files.py +++ b/isimip_qc/utils/files.py @@ -1,10 +1,11 @@ import os import shutil from pathlib import Path -from ..config import settings import colorlog +from ..config import settings + logger = colorlog.getLogger(__name__) @@ -25,7 +26,8 @@ def move_file(source_path, target_path, overwrite=False): logger.info('Copy file') shutil.move(source_path, target_path) else: - logger.warn('Skip moving because target file is present and overwriting not allowed. Use -O to allow overwriting.') + logger.warn('Skip moving because target file is present and overwriting not allowed.' + ' Use -O to allow overwriting.') def copy_file(source_path, target_path): @@ -35,4 +37,5 @@ def copy_file(source_path, target_path): logger.info('Copy file') shutil.copy(source_path, target_path) else: - logger.warn('Skip copying because target file is present and overwriting not allowed. Use -O to allow overwriting.') + logger.warn('Skip copying because target file is present and overwriting not allowed.' + ' Use -O to allow overwriting.') diff --git a/pyproject.toml b/pyproject.toml index a80558e..d62345d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,14 @@ dependencies = [ ] dynamic = ["version"] +[project.optional-dependencies] +dev = [ + "build", + "pre-commit", + "ruff", + "twine", +] + [project.urls] Repository = "https://github.com/ISI-MIP/isimip-qc" @@ -38,3 +46,41 @@ isimip-qc = "isimip_qc.main:main" [tool.setuptools.dynamic] version = { attr = "isimip_qc.__version__" } + +[tool.ruff] +target-version = "py38" +line-length = 120 +select = [ + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "PGH", # pygrep-hooks + "RUF", # ruff + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] +ignore = [ + "B006", # mutable-argument-default + "B007", # unused-loop-control-variable + "B018", # useless-expression + "RUF012", # mutable-class-default +] + +[tool.ruff.isort] +known-first-party = [ + "isimip_qc" +] +section-order = [ + "future", + "standard-library", + "third-party", + "isimip_utils", + "first-party", + "local-folder" +] + +[tool.ruff.isort.sections] +isimip_utils = ["isimip_utils"] From 4bc3cf65e64164949694d69d8d7af085853f48e2 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 20 Sep 2023 11:29:04 +0200 Subject: [PATCH 2/3] Fix formatting --- isimip_qc/checks/3d.py | 11 ++++++++--- isimip_qc/checks/experiments.py | 3 ++- isimip_qc/checks/variables/var.py | 23 +++++++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/isimip_qc/checks/3d.py b/isimip_qc/checks/3d.py index e8589d2..104f682 100644 --- a/isimip_qc/checks/3d.py +++ b/isimip_qc/checks/3d.py @@ -1,6 +1,8 @@ from isimip_qc.config import settings + from ..exceptions import FileCritical + def check_3d(file): crop = file.specifiers.get('crop') irrigation = file.specifiers.get('irrigation') @@ -37,7 +39,8 @@ def check_3d(file): definition = settings.DEFINITIONS.get('variable', {}).get(file.specifiers.get('variable')) if definition is None: - raise FileCritical(file,'Variable %s not defined for sector %s. skipping...', file.variable_name, settings.SECTOR) + raise FileCritical(file, 'Variable %s not defined for sector %s. skipping...', + file.variable_name, settings.SECTOR) # check for number of variable dependencies @@ -49,8 +52,10 @@ def check_3d(file): settings_dim_len = 3 if file_dim_len != settings_dim_len: - file.critical('Dimension missing (%s found, %s expected). Declare as %s%s. ', - file_dim_len, settings_dim_len, file.variable_name, str(definition['dimensions']).replace('\'','')) + file.critical( + 'Dimension missing (%s found, %s expected). Declare as %s%s. ', + file_dim_len, settings_dim_len, file.variable_name, str(definition['dimensions']).replace('\'', '') + ) # detect 2d or 3d data file.is_time_fixed = False diff --git a/isimip_qc/checks/experiments.py b/isimip_qc/checks/experiments.py index 4dda86f..d63f1a8 100644 --- a/isimip_qc/checks/experiments.py +++ b/isimip_qc/checks/experiments.py @@ -1,5 +1,6 @@ -from ..utils.experiments import get_experiment from ..exceptions import FileCritical +from ..utils.experiments import get_experiment + def check_experiment(file): if file.specifiers.get("time_step") == "fixed": diff --git a/isimip_qc/checks/variables/var.py b/isimip_qc/checks/variables/var.py index 76ffb4d..1881f31 100644 --- a/isimip_qc/checks/variables/var.py +++ b/isimip_qc/checks/variables/var.py @@ -26,7 +26,8 @@ def check_variable(file): # check dtype if variable.dtype != 'float32': - file.warn('%s data type is "%s" should be "float32".', file.variable_name, variable.dtype, fix_datamodel=True) + file.warn('%s data type is "%s" should be "float32".', + file.variable_name, variable.dtype, fix_datamodel=True) # check chunking chunking = variable.chunking() @@ -163,20 +164,26 @@ def check_variable(file): try: attr = variable.getncattr(name) if attr.dtype != variable.dtype: - file.error('%s data type (%s) differs from %s data type (%s).', name, attr.dtype, file.variable_name, variable.dtype) + file.error('%s data type (%s) differs from %s data type (%s).', + name, attr.dtype, file.variable_name, variable.dtype) else: if not math.isclose(attr, 1e+20, rel_tol=1e-6): - file.error('Missing values for variable "%s": %s=%s but should be 1e+20.', file.variable_name, name, attr) + file.error('Missing values for variable "%s": %s=%s but should be 1e+20.', + file.variable_name, name, attr) else: file.info('Missing value attribute "%s" is properly set.', name) except AttributeError: if name == 'missing_value': - file.warn('"%s" attribute for variable "%s" is missing. Should be set to 1e+20f.', name, file.variable_name, fix={ - 'func': fix_set_variable_attr, - 'args': (file, file.variable_name, 'missing_value', 1e+20) - }) + file.warn( + '"%s" attribute for variable "%s" is missing. Should be set to 1e+20f.', + name, file.variable_name, fix={ + 'func': fix_set_variable_attr, + 'args': (file, file.variable_name, 'missing_value', 1e+20) + } + ) else: - file.error('"%s" attribute for variable "%s" is missing. Should be set to 1e+20f and must be set when variable is created.', name, file.variable_name) + file.error('"%s" attribute for variable "%s" is missing. Should be set to 1e+20f and must be set' + ' when variable is created.', name, file.variable_name) # check valid range if settings.MINMAX: From f054acbbe920f661761d920336866febc6b13523 Mon Sep 17 00:00:00 2001 From: Matthias Buechner Date: Wed, 20 Sep 2023 12:02:43 +0200 Subject: [PATCH 3/3] Release 3.3.0 --- isimip_qc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isimip_qc/__init__.py b/isimip_qc/__init__.py index 260e525..56b30ef 100644 --- a/isimip_qc/__init__.py +++ b/isimip_qc/__init__.py @@ -1 +1 @@ -VERSION = __version__ = '3.2.5' +VERSION = __version__ = '3.3.0'