diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 39007f4..f113aac 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -9,13 +9,13 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get history and tags for versioning to work run: | git fetch --prune --unshallow git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/.github/workflows/run-python-tests.yaml b/.github/workflows/run-python-tests.yaml index 7e3ec06..ec3ccd5 100644 --- a/.github/workflows/run-python-tests.yaml +++ b/.github/workflows/run-python-tests.yaml @@ -18,9 +18,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/requirements.txt b/requirements.txt index b43bab7..73343ec 100755 --- a/requirements.txt +++ b/requirements.txt @@ -81,7 +81,7 @@ openpyxl==3.1.2 # via hdx-python-utilities (pyproject.toml) packaging==24.0 # via pytest -petl==1.7.14 +petl==1.7.15 # via frictionless platformdirs==4.2.0 # via virtualenv @@ -89,7 +89,7 @@ pluggy==1.4.0 # via pytest pre-commit==3.6.2 # via hdx-python-utilities (pyproject.toml) -pydantic==2.6.3 +pydantic==2.6.4 # via frictionless pydantic-core==2.16.3 # via pydantic diff --git a/src/hdx/utilities/downloader.py b/src/hdx/utilities/downloader.py index f98dbc1..04010d2 100755 --- a/src/hdx/utilities/downloader.py +++ b/src/hdx/utilities/downloader.py @@ -42,6 +42,8 @@ class Download(BaseDownload): auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) + bearer_token (str): Bearer token string OR + bearer_token_file (str): Path to file containing bearer token string OR extra_params_dict (Dict[str, str]): Extra parameters to put on end of url as a dictionary OR extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url @@ -304,6 +306,22 @@ def normal_setup( ) from e return self.response + def set_bearer_token(self, bearer_token: str) -> None: + """Set bearer token + + Args: + bearer_token (str): Bearer token + + Returns: + None + """ + self.session.headers.update( + { + "Accept": "application/json", + "Authorization": f"Bearer {bearer_token}", + } + ) + def hash_stream(self, url: str) -> str: """Stream file from url and hash it using MD5. Must call setup method first. @@ -1265,6 +1283,8 @@ def generate_downloaders( auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) + bearer_token (str): Bearer token string OR + bearer_token_file (str): Path to file containing bearer token string OR extra_params_dict (Dict[str, str]): Extra parameters to put on end of url as a dictionary OR extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url diff --git a/src/hdx/utilities/retriever.py b/src/hdx/utilities/retriever.py index 48e2b66..9d8a292 100644 --- a/src/hdx/utilities/retriever.py +++ b/src/hdx/utilities/retriever.py @@ -166,6 +166,17 @@ def get_filename( filename = slugify(f"{filename}{extension}") return f"{prefix}{filename}{first_ext}", kwargs + def set_bearer_token(self, bearer_token: str) -> None: + """Set bearer token in downloader + + Args: + bearer_token (str): Bearer token + + Returns: + None + """ + self.downloader.set_bearer_token(bearer_token) + def download_file( self, url: str, diff --git a/src/hdx/utilities/session.py b/src/hdx/utilities/session.py index e2f220f..2ce6268 100755 --- a/src/hdx/utilities/session.py +++ b/src/hdx/utilities/session.py @@ -33,8 +33,9 @@ def get_session( ) -> requests.Session: """Set up and return Session object that is set up with retrying. Requires either global user agent to be set or appropriate user agent parameter(s) - to be completed. If the EXTRA_PARAMS or BASIC_AUTH environment variable is - supplied, the extra_params* parameters will be ignored. + to be completed. If the EXTRA_PARAMS, BASIC_AUTH or BEARER_TOKEN + environment variable is supplied, the extra_params* parameters will be + ignored. Args: user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed. @@ -48,7 +49,9 @@ def get_session( **kwargs: See below auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR - basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) + basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR + bearer_token (str): Bearer token string OR + bearer_token_file (str): Path to file containing bearer token string OR extra_params_dict (Dict): Extra parameters to put on end of url as a dictionary OR extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url @@ -76,11 +79,16 @@ def get_session( extra_params_found = False extra_params_dict = None basic_auth = None + bearer_token = None if use_env: basic_auth_env = os.getenv("BASIC_AUTH") if basic_auth_env: basic_auth = basic_auth_env - auths_found.append("basic_auth environment variable") + auths_found.append("BASIC_AUTH environment variable") + bearer_token_env = os.getenv("BEARER_TOKEN") + if bearer_token_env: + bearer_token = bearer_token_env + auths_found.append("BEARER_TOKEN environment variable") extra_params = os.getenv("EXTRA_PARAMS") if extra_params: if "=" in extra_params: @@ -133,10 +141,15 @@ def get_session( ) if extra_params_dict: basic_auth_param = extra_params_dict.get("basic_auth") + bearer_token_param = extra_params_dict.get("bearer_token") if basic_auth_param: basic_auth = basic_auth_param auths_found.append("basic_auth parameter") del extra_params_dict["basic_auth"] + if bearer_token_param: + bearer_token = bearer_token_param + auths_found.append("bearer_token parameter") + del extra_params_dict["bearer_token"] s.params = extra_params_dict @@ -144,6 +157,10 @@ def get_session( if basic_auth_arg: basic_auth = basic_auth_arg auths_found.append("basic_auth argument") + bearer_token_arg = kwargs.get("bearer_token") + if bearer_token_arg: + bearer_token = bearer_token_arg + auths_found.append("bearer_token argument") auth = kwargs.get("auth") if auth: @@ -157,6 +174,15 @@ def get_session( except OSError: if fail_on_missing_file: raise + bearer_token_file = kwargs.get("bearer_token_file") + if bearer_token_file: + logger.info(f"Loading bearer token from: {bearer_token_file}") + try: + bearer_token = load_text(bearer_token_file, strip=True) + auths_found.append(f"file {bearer_token_file}") + except OSError: + if fail_on_missing_file: + raise if len(auths_found) > 1: auths_found_str = ", ".join(auths_found) raise SessionError( @@ -165,7 +191,16 @@ def get_session( if "headers" not in auths_found: if basic_auth: auth = basicauth_decode(basic_auth) - s.auth = auth + s.auth = auth + elif bearer_token: + s.headers.update( + { + "Accept": "application/json", + "Authorization": f"Bearer {bearer_token}", + } + ) + else: + s.auth = auth status_forcelist = kwargs.get( "status_forcelist", (429, 500, 502, 503, 504) diff --git a/tests/fixtures/downloader/bearertoken.txt b/tests/fixtures/downloader/bearertoken.txt new file mode 100644 index 0000000..e56e15b --- /dev/null +++ b/tests/fixtures/downloader/bearertoken.txt @@ -0,0 +1 @@ +12345 diff --git a/tests/hdx/utilities/test_downloader.py b/tests/hdx/utilities/test_downloader.py index 21e1168..759c326 100755 --- a/tests/hdx/utilities/test_downloader.py +++ b/tests/hdx/utilities/test_downloader.py @@ -114,14 +114,29 @@ def test_get_path_for_url( assert abspath(path) == abspath(join(downloaderfolder, filename)) def test_init(self, monkeypatch, downloaderfolder): - basicauthfile = join(downloaderfolder, "basicauth.txt") with Download(auth=("u", "p")) as downloader: assert downloader.session.auth == ("u", "p") basicauth = "Basic dXNlcjpwYXNz" with Download(basic_auth=basicauth) as downloader: assert downloader.session.auth == ("user", "pass") + basicauthfile = join(downloaderfolder, "basicauth.txt") with Download(basic_auth_file=basicauthfile) as downloader: assert downloader.session.auth == ("testuser", "testpass") + bearertoken = "ABCDE" + with Download(bearer_token=bearertoken) as downloader: + assert downloader.session.headers["Accept"] == "application/json" + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) + bearertokenfile = join(downloaderfolder, "bearertoken.txt") + bearertoken = "12345" + with Download(bearer_token_file=bearertokenfile) as downloader: + assert downloader.session.headers["Accept"] == "application/json" + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) extraparamsyamltree = join(downloaderfolder, "extra_params_tree.yaml") with Download( extra_params_yaml=extraparamsyamltree, extra_params_lookup="mykey" @@ -130,6 +145,19 @@ def test_init(self, monkeypatch, downloaderfolder): monkeypatch.setenv("BASIC_AUTH", basicauth) with Download() as downloader: assert downloader.session.auth == ("user", "pass") + bearertoken = "98765" + monkeypatch.setenv("BEARER_TOKEN", bearertoken) + with pytest.raises(SessionError): + Download() + monkeypatch.delenv("BASIC_AUTH") + with Download() as downloader: + assert downloader.session.headers["Accept"] == "application/json" + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) + monkeypatch.delenv("BEARER_TOKEN") + monkeypatch.setenv("BASIC_AUTH", basicauth) with pytest.raises(SessionError): Download(basic_auth="12345") with pytest.raises(SessionError): @@ -173,6 +201,8 @@ def test_init(self, monkeypatch, downloaderfolder): ) with pytest.raises(IOError): Download(basic_auth_file="NOTEXIST") + with pytest.raises(IOError): + Download(bearer_token_file="NOTEXIST") extraparamsyaml = join(downloaderfolder, "extra_params.yaml") test_url = "http://www.lalala.com/lala" with Download( @@ -226,6 +256,22 @@ def test_init(self, monkeypatch, downloaderfolder): assert "param3=11" in full_url assert "basic_auth" not in full_url monkeypatch.delenv("EXTRA_PARAMS") + bearertoken = "ZYXWV" + with Download( + extra_params_dict={"bearer_token": bearertoken} + ) as downloader: + assert downloader.session.headers["Accept"] == "application/json" + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) + bearertoken = "FGHIJ" + downloader.set_bearer_token(bearertoken) + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) + with pytest.raises(SessionError): Download( extra_params_dict={"key1": "val1"}, diff --git a/tests/hdx/utilities/test_retriever.py b/tests/hdx/utilities/test_retriever.py index 2cbf17c..2729510 100755 --- a/tests/hdx/utilities/test_retriever.py +++ b/tests/hdx/utilities/test_retriever.py @@ -156,6 +156,29 @@ def test_error(self, dirs, retrieverfolder, fallback_dir): use_saved=True, ) + def test_set_bearer_token(self, dirs, retrieverfolder, fallback_dir): + saved_dir, temp_dir = dirs + bearertoken = "12345" + with Download(bearer_token=bearertoken) as downloader: + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) + with Retrieve( + downloader, + fallback_dir, + saved_dir, + temp_dir, + save=False, + use_saved=False, + ) as retriever: + bearertoken = "67890" + retriever.set_bearer_token(bearertoken) + assert ( + downloader.session.headers["Authorization"] + == f"Bearer {bearertoken}" + ) + def test_download_nosave(self, dirs, retrieverfolder, fallback_dir): saved_dir, temp_dir = dirs with Download() as downloader: