diff --git a/.config/pre-commit-config.yaml b/.config/pre-commit-config.yaml index d922460..4da51ee 100644 --- a/.config/pre-commit-config.yaml +++ b/.config/pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: exclude: test_scraper_.*\.json - id: check-ast - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.2 + rev: v0.4.2 hooks: # Run the linter. - id: ruff @@ -18,7 +18,7 @@ repos: - id: ruff-format args: [--config, .config/ruff.toml] - repo: https://github.com/astral-sh/uv-pre-commit - rev: v0.1.24 + rev: 0.1.38 hooks: # Run the pip compile - id: pip-compile diff --git a/documentation/main.md b/documentation/main.md index 4d709be..16ca193 100755 --- a/documentation/main.md +++ b/documentation/main.md @@ -52,6 +52,10 @@ The library has detailed API documentation which can be found in the menu at the ## Breaking Changes +From 6.2.8, fix mark_data_updated which was broken due to an error in +dataset_update_filestore_resource in which timezone information was +incorrectly added to the iso formatted string + From 6.2.7, generate_resource_from_iterator renamed to generate_resource_from_iterable with requirement of iterable rather iterator diff --git a/pyproject.toml b/pyproject.toml index 4875983..588a5db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "defopt>=6.4.0", "email_validator", "hdx-python-country>=3.7.0", - "hdx-python-utilities>=3.6.7", + "hdx-python-utilities>=3.6.8", "libhxl>=5.2.1", "makefun", "ndg-httpsclient", diff --git a/requirements.txt b/requirements.txt index 4c09306..8938f9b 100755 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ charset-normalizer==3.3.2 ckanapi==4.8 click==8.1.7 # via typer -coverage==7.4.4 +coverage==7.5.0 # via pytest-cov cryptography==42.0.5 # via pyopenssl @@ -35,7 +35,7 @@ docopt==0.6.2 # via # ckanapi # num2words -docutils==0.21.post1 +docutils==0.21.2 # via defopt email-validator==2.1.1 et-xmlfile==1.1.0 @@ -52,19 +52,19 @@ google-auth-oauthlib==1.2.0 # via gspread gspread==6.1.0 hdx-python-country==3.7.0 -hdx-python-utilities==3.6.7 +hdx-python-utilities==3.6.8 # via hdx-python-country humanize==4.9.0 # via frictionless -identify==2.5.35 +identify==2.5.36 # via pre-commit -idna==3.6 +idna==3.7 # via # email-validator # requests ijson==3.2.3 # via hdx-python-utilities -inflect==7.2.0 +inflect==7.2.1 # via quantulum3 iniconfig==2.0.0 # via pytest @@ -108,9 +108,9 @@ packaging==24.0 # via pytest petl==1.7.15 # via frictionless -platformdirs==4.2.0 +platformdirs==4.2.1 # via virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest ply==3.11 # via @@ -128,9 +128,9 @@ pyasn1-modules==0.4.0 # via google-auth pycparser==2.22 # via cffi -pydantic==2.6.4 +pydantic==2.7.1 # via frictionless -pydantic-core==2.16.3 +pydantic-core==2.18.2 # via pydantic pygments==2.17.2 # via rich @@ -159,7 +159,7 @@ pyyaml==6.0.1 # frictionless # pre-commit # tableschema-to-template -quantulum3==0.9.0 +quantulum3==0.9.1 ratelimit==2.2.1 # via hdx-python-utilities requests==2.31.0 @@ -183,7 +183,7 @@ ruamel-yaml==0.18.6 # via hdx-python-utilities ruamel-yaml-clib==0.2.8 # via ruamel-yaml -setuptools==69.2.0 +setuptools==69.5.1 # via # ckanapi # nodeenv @@ -234,9 +234,9 @@ urllib3==2.2.1 # via # libhxl # requests -validators==0.28.0 +validators==0.28.1 # via frictionless -virtualenv==20.25.1 +virtualenv==20.26.0 # via pre-commit wheel==0.43.0 # via libhxl diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 5cc3bfa..5013c27 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -5,7 +5,7 @@ import sys import warnings from copy import deepcopy -from datetime import datetime, timezone +from datetime import datetime from os.path import isfile, join from typing import ( TYPE_CHECKING, @@ -41,6 +41,7 @@ default_date, default_enddate, now_utc, + now_utc_notz, parse_date, parse_date_range, ) @@ -598,9 +599,9 @@ def _prepare_hdx_call(self, data: Dict, kwargs: Any) -> None: del kwargs["updated_by_script"] else: scriptinfo = self.configuration.get_user_agent() - # No need to output timezone info here + # Should not output timezone info here data["updated_by_script"] = ( - f"{scriptinfo} ({datetime.now(timezone.utc).replace(tzinfo=None).isoformat(timespec='microseconds')})" + f"{scriptinfo} ({now_utc_notz().isoformat(timespec='microseconds')})" ) batch = kwargs.get("batch") if batch: diff --git a/src/hdx/data/filestore_helper.py b/src/hdx/data/filestore_helper.py index dbbee9f..66cee01 100755 --- a/src/hdx/data/filestore_helper.py +++ b/src/hdx/data/filestore_helper.py @@ -1,8 +1,9 @@ """Helper to the Dataset class for handling resources with filestores.""" -from datetime import datetime, timezone from typing import TYPE_CHECKING, Any, Dict +from hdx.utilities.dateparse import now_utc_notz + if TYPE_CHECKING: from hdx.data.resource import Resource @@ -90,7 +91,8 @@ def dataset_update_filestore_resource( data_updated = resource_data_to_update.is_marked_data_updated() if data_updated: - resource_data_to_update["last_modified"] = datetime.now( - timezone.utc - ).isoformat(timespec="microseconds") + # Should not output timezone info here + resource_data_to_update["last_modified"] = ( + now_utc_notz().isoformat(timespec="microseconds") + ) resource_data_to_update.data_updated = False diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index e7e6dff..8b94e5c 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -2,7 +2,7 @@ import logging import warnings -from datetime import datetime, timezone +from datetime import datetime from os.path import join from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union @@ -13,7 +13,7 @@ from hdx.data.date_helper import DateHelper from hdx.data.hdxobject import HDXError, HDXObject from hdx.data.resource_view import ResourceView -from hdx.utilities.dateparse import now_utc, parse_date +from hdx.utilities.dateparse import now_utc, now_utc_notz, parse_date from hdx.utilities.downloader import Download from hdx.utilities.typehint import ListTuple from hdx.utilities.uuid import is_valid_uuid @@ -393,9 +393,10 @@ def _resource_merge_hdx_update( """ data_updated = kwargs.pop("data_updated", self.data_updated) if data_updated and not self.file_to_upload: - self.old_data["last_modified"] = datetime.now( - timezone.utc - ).isoformat(timespec="microseconds") + # Should not output timezone info here + self.old_data["last_modified"] = now_utc_notz().isoformat( + timespec="microseconds" + ) self.data_updated = False # old_data will be merged into data in the next step self._merge_hdx_update( diff --git a/tests/hdx/data/__init__.py b/tests/hdx/data/__init__.py index 5735909..7ab16e4 100755 --- a/tests/hdx/data/__init__.py +++ b/tests/hdx/data/__init__.py @@ -96,6 +96,16 @@ def json(self): }, ] +resource_data = { + "name": "MyResource1", + "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", + "format": "xlsx", + "url": "http://test/spreadsheet.xlsx", + "description": "My Resource", + "api_type": "api", + "resource_type": "api", +} + organization_data = { "name": "MyOrganization1", "title": "Humanitarian Organization", diff --git a/tests/hdx/data/test_filestore_helper.py b/tests/hdx/data/test_filestore_helper.py new file mode 100644 index 0000000..735bb86 --- /dev/null +++ b/tests/hdx/data/test_filestore_helper.py @@ -0,0 +1,49 @@ +import copy +import re + +from . import resource_data +from hdx.data.filestore_helper import FilestoreHelper +from hdx.data.resource import Resource + + +class TestFilestoreHelper: + def test_dataset_update_filestore_resource(self, configuration): + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) + filestore_resources = {} + FilestoreHelper.dataset_update_filestore_resource( + resource, filestore_resources, 0 + ) + assert resource == { + "api_type": "api", + "description": "My Resource", + "format": "xlsx", + "name": "MyResource1", + "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", + "resource_type": "api", + "url": "http://test/spreadsheet.xlsx", + } + assert filestore_resources == {} + + resource.set_file_to_upload("test") + FilestoreHelper.dataset_update_filestore_resource( + resource, filestore_resources, 0 + ) + assert resource == { + "api_type": "api", + "description": "My Resource", + "format": "xlsx", + "name": "MyResource1", + "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", + "resource_type": "api", + "url": "updated_by_file_upload_step", + } + assert filestore_resources == {0: "test"} + + resource.mark_data_updated() + FilestoreHelper.dataset_update_filestore_resource( + resource, filestore_resources, 0 + ) + regex = r"^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d$" + assert re.match(regex, resource["last_modified"]) + assert filestore_resources == {0: "test"} diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index a68d09f..14cec23 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -9,7 +9,7 @@ import pytest -from . import MockResponse, dataset_resultdict +from . import MockResponse, dataset_resultdict, resource_data from .test_resource_view import resource_view_list, resource_view_mocklist from hdx.api.configuration import Configuration from hdx.data.hdxobject import HDXError @@ -318,16 +318,6 @@ def mockresourceview(url, decodedata): class TestResource: - resource_data = { - "name": "MyResource1", - "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", - "format": "xlsx", - "url": "http://test/spreadsheet.xlsx", - "description": "My Resource", - "api_type": "api", - "resource_type": "api", - } - datastore = None @pytest.fixture(scope="class") @@ -685,15 +675,15 @@ def test_read_from_hdx(self, configuration, read): Resource.read_from_hdx("ABC") def test_check_url_filetoupload(self, configuration): - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) resource.check_url_filetoupload() resource.set_file_to_upload("abc") resource.check_url_filetoupload() resource["url"] = "lala" with pytest.raises(HDXError): resource.check_url_filetoupload() - resource = Resource(resource_data) + resource = Resource(resource_data_copy) resource["format"] = "NOTEXIST" with pytest.raises(HDXError): resource.check_url_filetoupload() @@ -726,8 +716,8 @@ def test_get_set_date_of_resource(self): } def test_check_required_fields(self, configuration): - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) resource.check_url_filetoupload() resource.check_required_fields() @@ -740,8 +730,8 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create): with pytest.raises(HDXError): resource.create_in_hdx() - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) resource.create_in_hdx() assert resource["id"] == "de6549d8-268b-4dfe-adaf-a4ae5c8510d5" assert resource["url_type"] == "api" @@ -751,8 +741,8 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create): == "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/main/tests/fixtures/test_data.csv" ) - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) filetoupload = join("tests", "fixtures", "test_data.csv") resource.set_file_to_upload(filetoupload) assert resource.get_file_to_upload() == filetoupload @@ -770,13 +760,13 @@ def test_create_in_hdx(self, configuration, date_pattern, post_create): resource["url"] == "http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv" ) - resource_data["name"] = "MyResource2" - resource = Resource(resource_data) + resource_data_copy["name"] = "MyResource2" + resource = Resource(resource_data_copy) with pytest.raises(HDXError): resource.create_in_hdx() - resource_data["name"] = "MyResource3" - resource = Resource(resource_data) + resource_data_copy["name"] = "MyResource3" + resource = Resource(resource_data_copy) with pytest.raises(HDXError): resource.create_in_hdx() @@ -841,10 +831,10 @@ def test_update_in_hdx(self, configuration, date_pattern, post_update): with pytest.raises(HDXError): resource.update_in_hdx() - resource_data = copy.deepcopy(TestResource.resource_data) - resource_data["name"] = "MyResource1" - resource_data["id"] = "74b74ae1-df0c-4716-829f-4f939a046811" - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource_data_copy["name"] = "MyResource1" + resource_data_copy["id"] = "74b74ae1-df0c-4716-829f-4f939a046811" + resource = Resource(resource_data_copy) resource.mark_data_updated() assert resource.data_updated is True assert resource.is_marked_data_updated() is True @@ -872,8 +862,8 @@ def test_delete_from_hdx(self, configuration, post_delete): resource.delete_from_hdx() def test_update_yaml(self, configuration, static_yaml): - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) assert resource["name"] == "MyResource1" assert resource.get_format() == "xlsx" resource.update_from_yaml(static_yaml) @@ -881,8 +871,8 @@ def test_update_yaml(self, configuration, static_yaml): assert resource.get_format() == "csv" def test_update_json(self, configuration, static_json): - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) assert resource["name"] == "MyResource1" assert resource.get_format() == "xlsx" resource.update_from_json(static_json) @@ -898,8 +888,8 @@ def test_patch(self, configuration, post_patch): assert resource["id"] == "de6549d8-268b-4dfe-adaf-a4ae5c8510d5" def test_get_dataset(self, configuration, post_dataset): - resource_data = copy.deepcopy(TestResource.resource_data) - resource = Resource(resource_data) + resource_data_copy = copy.deepcopy(resource_data) + resource = Resource(resource_data_copy) dataset = resource.get_dataset() assert dataset["id"] == "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d" del resource["package_id"] diff --git a/tests/hdx/data/test_update_dataset_resources.py b/tests/hdx/data/test_update_dataset_resources.py index aa0e783..0e3fb67 100644 --- a/tests/hdx/data/test_update_dataset_resources.py +++ b/tests/hdx/data/test_update_dataset_resources.py @@ -130,6 +130,10 @@ def test_dataset_update_resources( ("QuickCharts-SDG 4 Global and Thematic data", "csv"), ] dataset._prepare_hdx_call(dataset.old_data, {}) + assert ( + dataset["updated_by_script"] + == "HDX Scraper: UNESCO (2022-12-19T12:51:30.579185)" + ) results = dataset._revise_dataset( tuple(), resources_to_update,