diff --git a/download_sample_data.py b/download_sample_data.py index a37bb21..26422b3 100644 --- a/download_sample_data.py +++ b/download_sample_data.py @@ -12,6 +12,7 @@ """ import datetime import warnings +from itertools import groupby from pathlib import Path import iris @@ -107,6 +108,34 @@ def select_host(hosts, preferred_hosts, ignore_hosts): return hosts[0] +def select_latest_versions(datasets: dict) -> dict: + """Return a dict with only the latest version of each dataset. + + Parameters + ---------- + datasets : dict + A dict with dataset objects + + Returns + ------- + most_recent_datasets : dict + A dict containing only the most recent version of each dataset object, + in case multiple versions have been passed. + """ + keys = (key.rsplit('.', 1) for key in datasets) + keys = sorted(keys) + grouped = groupby(keys, key=lambda key: key[0]) + + most_recent_keys = (list(versions)[-1] for group, versions in grouped) + most_recent_datasets = {} + + for name, version in most_recent_keys: + key = f'{name}.{version}' + most_recent_datasets[key] = datasets[key] + + return most_recent_datasets + + def search(connection, preferred_hosts, ignore_hosts, facets): """Search for files on ESGF. @@ -139,7 +168,11 @@ def search(connection, preferred_hosts, ignore_hosts, facets): datasets[dataset_name] = {} datasets[dataset_name][host] = dataset - print("Found", len(datasets), "unique datasets") + # For some datasets, multiple versions are returned + # https://github.com/ESMValGroup/ESMValTool_sample_data/issues/5 + datasets = select_latest_versions(datasets) + + print(f"Found {len(datasets)} datasets (only the latest versions)") # Select host and find files on host files = {} diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/Amon/ta/gn/v20190306/ta_Amon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/Amon/ta/gn/v20190306/ta_Amon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc deleted file mode 100644 index dec3611..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/Amon/ta/gn/v20190306/ta_Amon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc and /dev/null differ diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/day/ta/gn/v20190306/ta_day_CanESM5_historical_r1i1p1f1_gn_19910101-20001231.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/day/ta/gn/v20190306/ta_day_CanESM5_historical_r1i1p1f1_gn_19910101-20001231.nc deleted file mode 100644 index 8f3e969..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/day/ta/gn/v20190306/ta_day_CanESM5_historical_r1i1p1f1_gn_19910101-20001231.nc and /dev/null differ diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/day/ta/gn/v20190306/ta_day_CanESM5_historical_r1i1p1f1_gn_20010101-20101231.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/day/ta/gn/v20190306/ta_day_CanESM5_historical_r1i1p1f1_gn_20010101-20101231.nc deleted file mode 100644 index 99f8d0b..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/day/ta/gn/v20190306/ta_day_CanESM5_historical_r1i1p1f1_gn_20010101-20101231.nc and /dev/null differ diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_195001-196912.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_195001-196912.nc deleted file mode 100644 index 0eeb99e..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_195001-196912.nc and /dev/null differ diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_197001-198912.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_197001-198912.nc deleted file mode 100644 index fe281a0..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_197001-198912.nc and /dev/null differ diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_199001-200912.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_199001-200912.nc deleted file mode 100644 index 7311c22..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_199001-200912.nc and /dev/null differ diff --git a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_201001-201412.nc b/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_201001-201412.nc deleted file mode 100644 index 262498a..0000000 Binary files a/esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/FIO-QLNM/FIO-ESM-2-0/historical/r1i1p1f1/Amon/ta/gn/v20191204/ta_Amon_FIO-ESM-2-0_historical_r1i1p1f1_gn_201001-201412.nc and /dev/null differ