From 8060553c088e431ff201b1070432d16e9266ccf1 Mon Sep 17 00:00:00 2001 From: Sam Scully Date: Wed, 4 Sep 2024 17:02:59 +0100 Subject: [PATCH] Opencage CLI adapted from batch.py example script (#54) * Initial release of 'opencage' CLI tool * prepare version 3.0.0 release --------- Co-authored-by: marc tobias --- .github/workflows/build.yml | 1 - Changes.txt | 6 +- README.md | 38 +++- examples/batch.py | 4 +- opencage/batch.py | 207 +++++++++++++++++++++ opencage/command_line.py | 118 ++++++++++++ opencage/geocoder.py | 12 +- opencage/version.py | 2 +- pytest.ini | 2 + setup.py | 22 ++- test/cli/test_cli_args.py | 139 ++++++++++++++ test/cli/test_cli_run.py | 163 ++++++++++++++++ test/fixtures/cli/forward.csv | 3 + test/fixtures/cli/forward_noresult.csv | 2 + test/fixtures/cli/forward_with_headers.csv | 4 + test/fixtures/cli/reverse.csv | 1 + test/fixtures/cli/reverse_with_errors.csv | 1 + test/fixtures/input.txt | 0 test/test_error_ratelimit_exceeded.py | 4 - tox.ini | 3 +- vagrant-provision.sh | 2 +- 21 files changed, 709 insertions(+), 25 deletions(-) create mode 100644 opencage/batch.py create mode 100644 opencage/command_line.py create mode 100644 pytest.ini create mode 100644 test/cli/test_cli_args.py create mode 100644 test/cli/test_cli_run.py create mode 100644 test/fixtures/cli/forward.csv create mode 100644 test/fixtures/cli/forward_noresult.csv create mode 100644 test/fixtures/cli/forward_with_headers.csv create mode 100644 test/fixtures/cli/reverse.csv create mode 100644 test/fixtures/cli/reverse_with_errors.csv create mode 100644 test/fixtures/input.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 446a546..d16f925 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,7 +18,6 @@ jobs: - "3.10" - "3.9" - "3.8" - - "3.7" os: - ubuntu-latest steps: diff --git a/Changes.txt b/Changes.txt index 8fdc526..de3b7ff 100644 --- a/Changes.txt +++ b/Changes.txt @@ -1,7 +1,11 @@ -unreleased +v3.0.0 Wed Sep 4 2024 + Requires python 3.7 and asyncio package + Inititial release of the 'opencage' CLI tool + RateLimitExceededError no longer prints reset date Batch example: warn if no API key present earlier Batch example: some errors were not printed, e.g. invalid API key Batch example: Check latest version of opencage package is used + Add python 3.12, no longer test against python 3.7 v2.3.1 Wed Nov 15 2023 New error 'SSLError' which is more explicit in case of SSL certificate chain issues diff --git a/README.md b/README.md index 9b90b67..0b51433 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,14 @@ A Python module to access the [OpenCage Geocoding API](https://opencagedata.com/ You can find a [comprehensive tutorial for using this module on the OpenCage site](https://opencagedata.com/tutorials/geocode-in-python). -There are also two brief video tutorials on YouTube, one [covering forward geocoding](https://www.youtube.com/watch?v=9bXu8-LPr5c), one [covering reverse geocoding](https://www.youtube.com/watch?v=u-kkE4yA-z0). +There are two brief video tutorials on YouTube, one [covering forward geocoding](https://www.youtube.com/watch?v=9bXu8-LPr5c), one [covering reverse geocoding](https://www.youtube.com/watch?v=u-kkE4yA-z0). + +The module installs an `opencage` CLI tool for geocoding files. Check `opencage --help` or the [CLI tutorial](https://opencagedata.com/tutorials/geocode-in-cli). + ## Usage -Supports Python 3.7 or newer. Use the older opencage 1.x releases if you need Python 2.7 support. +Supports Python 3.8 or newer. Starting opencage version 3.0 depends on asyncio package. Install the module: @@ -87,7 +90,7 @@ with OpenCageGeocode(key) as geocoder: You can run requests in parallel with the `geocode_async` and `reverse_geocode_async` method which have the same parameters and response as their synronous counterparts. -You will need at least Python 3.7 and the `asyncio` and `aiohttp` packages installed. +You will need at least Python 3.8 and the `asyncio` and `aiohttp` packages installed. ```python async with OpenCageGeocode(key) as geocoder: @@ -109,7 +112,34 @@ geocoder = OpenCageGeocode('your-api-key', 'http') ### Command-line batch geocoding -See `examples/batch.py` for an example to geocode a CSV file. +Use `opencage forward` or `opencage reverse` + +``` +opencage forward --help + + -h, --help show this help message and exit + --api-key API_KEY Your OpenCage API key + --input INPUT Input file name + --output OUTPUT Output file name + --headers If the first row should be treated as a header row + --input-columns INPUT_COLUMNS + Comma-separated list of integers (default '1') + --add-columns ADD_COLUMNS + Comma-separated list of output columns + --workers WORKERS Number of parallel geocoding requests (default 1) + --timeout TIMEOUT Timeout in seconds (default 10) + --retries RETRIES Number of retries (default 5) + --api-domain API_DOMAIN + API domain (default api.opencagedata.com) + --extra-params EXTRA_PARAMS + Extra parameters for each request (e.g. language=fr,no_dedupe=1) + --limit LIMIT Stop after this number of lines in the input + --dry-run Read the input file but no geocoding + --no-progress Display no progress bar + --quiet No progress bar and no messages + --overwrite Delete the output file first if it exists + --verbose Display debug information for each request +``` diff --git a/examples/batch.py b/examples/batch.py index 138ed5d..96271b2 100755 --- a/examples/batch.py +++ b/examples/batch.py @@ -3,7 +3,7 @@ # Background tutorial on async programming with Python # https://realpython.com/async-io-python/ -# Requires Python 3.7 or newer. Tested with 3.8/3.9/3.10/3.11. +# Requires Python 3.8 or newer. Tested with 3.8/3.9/3.10/3.11. # Installation: # pip3 install --upgrade opencage asyncio aiohttp backoff tqdm @@ -213,7 +213,7 @@ async def run_worker(worker_name, queue): async def main(): global PROGRESS_BAR - assert sys.version_info >= (3, 7), "Script requires Python 3.7 or newer" + assert sys.version_info >= (3, 8), "Script requires Python 3.8 or newer" ## 1. Read CSV into a Queue ## Each work_item is an address and id. The id will be part of the output, diff --git a/opencage/batch.py b/opencage/batch.py new file mode 100644 index 0000000..2fb7ad1 --- /dev/null +++ b/opencage/batch.py @@ -0,0 +1,207 @@ +import sys +import ssl +import asyncio +import traceback +import threading +import backoff +import certifi +import random + +from tqdm import tqdm +from urllib.parse import urlencode +from contextlib import suppress +from opencage.geocoder import OpenCageGeocode, OpenCageGeocodeError + +class OpenCageBatchGeocoder(): + def __init__(self, options): + self.options = options + self.sslcontext = ssl.create_default_context(cafile=certifi.where()) + self.write_counter = 1 + + def __call__(self, *args, **kwargs): + asyncio.run(self.geocode(*args, **kwargs)) + + async def geocode(self, input, output): + if not self.options.dry_run: + test = await self.test_request() + if test['error']: + self.log(test['error']) + return + + if self.options.headers: + header_columns = next(input, None) + if header_columns is None: + return + + queue = asyncio.Queue(maxsize=self.options.limit) + + await self.read_input(input, queue) + + if self.options.dry_run: + return + + if self.options.headers: + output.writerow(header_columns + self.options.add_columns) + + progress_bar = not (self.options.no_progress or self.options.quiet) and \ + tqdm(total=queue.qsize(), position=0, desc="Addresses geocoded", dynamic_ncols=True) + + tasks = [] + for _ in range(self.options.workers): + task = asyncio.create_task(self.worker(output, queue, progress_bar)) + tasks.append(task) + + # This starts the workers and waits until all are finished + await queue.join() + + # All tasks done + for task in tasks: + task.cancel() + + if progress_bar: + progress_bar.close() + + async def test_request(self): + try: + async with OpenCageGeocode(self.options.api_key, domain=self.options.api_domain, sslcontext=self.sslcontext) as geocoder: + result = await geocoder.geocode_async('Kendall Sq, Cambridge, MA', raw_response=True) + + free = False + with suppress(KeyError): + free = result['rate']['limit'] == 2500 + + return { 'error': None, 'free': free } + except Exception as exc: + return { 'error': exc } + + async def read_input(self, input, queue): + for index, row in enumerate(input): + line_number = index + 1 + + if len(row) == 0: + raise Exception(f"Empty line in input file at line number {line_number}, aborting") + + item = await self.read_one_line(row, line_number) + await queue.put(item) + + if queue.full(): + break + + async def read_one_line(self, row, row_id): + if self.options.command == 'reverse': + input_columns = [1, 2] + elif self.options.input_columns: + input_columns = self.options.input_columns + else: + input_columns = None + + if input_columns: + address = [] + try: + for column in input_columns: + # input_columns option uses 1-based indexing + address.append(row[column - 1]) + except IndexError: + self.log(f"Missing input column {column} in {row}") + else: + address = row + + if self.options.command == 'reverse' and len(address) != 2: + self.log(f"Expected two comma-separated values for reverse geocoding, got {address}") + + return { 'row_id': row_id, 'address': ','.join(address), 'original_columns': row } + + async def worker(self, output, queue, progress): + while True: + item = await queue.get() + + try: + await self.geocode_one_address(output, item['row_id'], item['address'], item['original_columns']) + + if progress: + progress.update(1) + except Exception as exc: + traceback.print_exception(exc, file=sys.stderr) + finally: + queue.task_done() + + async def geocode_one_address(self, output, row_id, address, original_columns): + def on_backoff(details): + if not self.options.quiet: + sys.stderr.write("Backing off {wait:0.1f} seconds afters {tries} tries " + "calling function {target} with args {args} and kwargs " + "{kwargs}\n".format(**details)) + + @backoff.on_exception(backoff.expo, + asyncio.TimeoutError, + max_time=self.options.timeout, + max_tries=self.options.retries, + on_backoff=on_backoff) + async def _geocode_one_address(): + async with OpenCageGeocode(self.options.api_key, domain=self.options.api_domain, sslcontext=self.sslcontext) as geocoder: + geocoding_results = None + params = { 'no_annotations': 1, **self.options.extra_params } + + try: + if self.options.command == 'reverse': + lon, lat = address.split(',') + geocoding_results = await geocoder.reverse_geocode_async(lon, lat, **params) + else: + geocoding_results = await geocoder.geocode_async(address, **params) + except OpenCageGeocodeError as exc: + self.log(str(exc)) + except Exception as exc: + traceback.print_exception(exc, file=sys.stderr) + + try: + if geocoding_results is not None and len(geocoding_results): + geocoding_result = geocoding_results[0] + else: + geocoding_result = None + + if self.options.verbose: + self.log({ + 'row_id': row_id, + 'thread_id': threading.get_native_id(), + 'request': geocoder.url + '?' + urlencode(geocoder._parse_request(address, params)), + 'response': geocoding_result + }) + + await self.write_one_geocoding_result(output, row_id, address, geocoding_result, original_columns) + except Exception as exc: + traceback.print_exception(exc, file=sys.stderr) + + await _geocode_one_address() + + async def write_one_geocoding_result(self, output, row_id, address, geocoding_result, original_columns = []): + row = original_columns + + for column in self.options.add_columns: + if geocoding_result is None: + row.append('') + elif column in geocoding_result: + row.append(geocoding_result[column]) + elif column in geocoding_result['components']: + row.append(geocoding_result['components'][column]) + elif column in geocoding_result['geometry']: + row.append(geocoding_result['geometry'][column]) + else: + row.append('') + + # Enforce that row are written ordered. That means we might wait for other threads + # to finish a task and make the overall process slower. Alternative would be to + # use a second queue, or keep some results in memory. + while row_id > self.write_counter: + if self.options.verbose: + self.log(f"Want to write row {row_id}, but write_counter is at {self.write_counter}") + await asyncio.sleep(random.uniform(0.01, 0.1)) + + if self.options.verbose: + self.log(f"Writing row {row_id}") + output.writerow(row) + self.write_counter = self.write_counter + 1 + + def log(self, message): + if not self.options.quiet: + sys.stderr.write(f"{message}\n") + diff --git a/opencage/command_line.py b/opencage/command_line.py new file mode 100644 index 0000000..3345103 --- /dev/null +++ b/opencage/command_line.py @@ -0,0 +1,118 @@ +import argparse +import sys +import os +import io +import re +import csv + +from opencage.batch import OpenCageBatchGeocoder +from opencage.version import __version__ + +def main(args=sys.argv[1:]): + options = parse_args(args) + + assert sys.version_info >= (3, 8), "Script requires Python 3.8 or newer" + + geocoder = OpenCageBatchGeocoder(options) + + with options.input as input: + output_io = io.StringIO() if options.dry_run else open(options.output, 'x', encoding='utf-8') + reader = csv.reader(input, strict=True, skipinitialspace=True) + writer = csv.writer(output_io) + + geocoder(input=reader, output=writer) + output_io.close() + +def parse_args(args): + if len(args) == 0: + print("To display help use 'opencage -h', 'opencage forward -h' or 'opencage reverse -h'", file=sys.stderr) + sys.exit(1) + + parser = argparse.ArgumentParser(description=f'Opencage CLI {__version__}') + parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}') + + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + + subparser_forward = subparsers.add_parser('forward', help="Forward geocode a file (input is address, add coordinates)") + subparser_reverse = subparsers.add_parser('reverse', help="Reverse geocode a file (input is coordinates, add full address)") + + for subparser in [subparser_forward, subparser_reverse]: + subparser.add_argument("--api-key", required=True, type=api_key_type, help="Your OpenCage API key") + subparser.add_argument("--input", required=True, type=argparse.FileType('r', encoding='utf-8'), help="Input file name") + subparser.add_argument("--output", required=True, type=str, help="Output file name") + + add_optional_arguments(subparser) + + options = parser.parse_args(args) + + if os.path.exists(options.output) and not options.dry_run: + if options.overwrite: + os.remove(options.output) + else: + print(f"Error: The output file '{options.output}' already exists. You can add --overwrite to your command.", file=sys.stderr) + sys.exit(1) + + return options + + +def add_optional_arguments(parser): + parser.add_argument("--headers", action="store_true", help="If the first row should be treated as a header row") + default_input_cols = '1,2' if re.match(r'.*reverse', parser.prog) else '1' + parser.add_argument("--input-columns", type=comma_separated_type(int), default=default_input_cols, help=f"Comma-separated list of integers (default '{default_input_cols}')") + parser.add_argument("--add-columns", type=comma_separated_type(str), default="lat,lng,_type,_category,country_code,country,state,county,_normalized_city,postcode,road,house_number,confidence,formatted", help="Comma-separated list of output columns") + parser.add_argument("--workers", type=ranged_type(int, 1, 20), default=1, help="Number of parallel geocoding requests (default 1)") + parser.add_argument("--timeout", type=ranged_type(int, 1, 60), default=10, help="Timeout in seconds (default 10)") + parser.add_argument("--retries", type=ranged_type(int, 1, 60), default=10, help="Number of retries (default 5)") + parser.add_argument("--api-domain", type=str, default="api.opencagedata.com", help="API domain (default api.opencagedata.com)") + parser.add_argument("--extra-params", type=comma_separated_dict_type, default="", help="Extra parameters for each request (e.g. language=fr,no_dedupe=1)") + parser.add_argument("--limit", type=int, default=0, help="Stop after this number of lines in the input") + parser.add_argument("--dry-run", action="store_true", help="Read the input file but no geocoding") + parser.add_argument("--no-progress", action="store_true", help="Display no progress bar") + parser.add_argument("--quiet", action="store_true", help="No progress bar and no messages") + parser.add_argument("--overwrite", action="store_true", help="Delete the output file first if it exists") + parser.add_argument("--verbose", action="store_true", help="Display debug information for each request") + + return parser + +def api_key_type(apikey): + pattern = re.compile(r"^(oc_gc_)?[0-9a-f]{32}$") + + if not pattern.match(apikey): + raise argparse.ArgumentTypeError("invalid API key") + + return apikey + + +def ranged_type(value_type, min_value, max_value): + def range_checker(arg: str): + try: + f = value_type(arg) + except ValueError: + raise argparse.ArgumentTypeError(f'must be a valid {value_type}') + if f < min_value or f > max_value: + raise argparse.ArgumentTypeError(f'must be within [{min_value}, {max_value}]') + return f + + # Return function handle to checking function + return range_checker + + +def comma_separated_type(value_type): + def comma_separated(arg: str): + if not arg: + return [] + + return [value_type(x) for x in arg.split(',')] + + return comma_separated + + +def comma_separated_dict_type(arg): + if not arg: + return {} + + try: + return dict([x.split('=') for x in arg.split(',')]) + except ValueError: + raise argparse.ArgumentTypeError("must be a valid comma separated list of key=value pairs") diff --git a/opencage/geocoder.py b/opencage/geocoder.py index 4ce6ea4..d3b650b 100644 --- a/opencage/geocoder.py +++ b/opencage/geocoder.py @@ -180,10 +180,14 @@ def geocode(self, query, **kwargs): if self.session and isinstance(self.session, aiohttp.client.ClientSession): raise AioHttpError("Cannot use `geocode` in an async context, use `geocode_async`.") + raw_response = kwargs.pop('raw_response', False) request = self._parse_request(query, kwargs) response = self._opencage_request(request) - return floatify_latlng(response['results']) + if raw_response: + return response + else: + return floatify_latlng(response['results']) async def geocode_async(self, query, **kwargs): """ @@ -209,10 +213,14 @@ async def geocode_async(self, query, **kwargs): if not isinstance(self.session, aiohttp.client.ClientSession): raise AioHttpError("You must use `geocode_async` in an async context.") + raw_response = kwargs.pop('raw_response', False) request = self._parse_request(query, kwargs) response = await self._opencage_async_request(request) - return floatify_latlng(response['results']) + if raw_response: + return response + else: + return floatify_latlng(response['results']) def reverse_geocode(self, lat, lng, **kwargs): """ diff --git a/opencage/version.py b/opencage/version.py index 1c4ddd3..4eb28e3 100644 --- a/opencage/version.py +++ b/opencage/version.py @@ -1 +1 @@ -__version__ = '2.3.1' +__version__ = '3.0.0' diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..79d7b5a --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_default_fixture_loop_scope = session diff --git a/setup.py b/setup.py index cb7e4fa..7769049 100644 --- a/setup.py +++ b/setup.py @@ -13,10 +13,10 @@ ROOT_DIR = os.path.dirname(__file__) SOURCE_DIR = os.path.join(ROOT_DIR) -if sys.version_info < (3, 6): +if sys.version_info < (3, 8): raise RuntimeError( - "opencage requires Python 3.7 or newer" - "Use older opencage 1.x for Python 2.7 or 3.6" + "opencage requires Python 3.8 or newer" + "Use older opencage 1.x for Python 2.7 or 3.7" ) # try for testing @@ -28,15 +28,20 @@ setup( name="opencage", - version="2.3.1", + version="3.0.0", description="Wrapper module for the OpenCage Geocoder API", long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', author="OpenCage GmbH", author_email="info@opencagedata.com", url="https://github.com/OpenCageData/python-opencage-geocoder/", - download_url="https://github.com/OpenCageData/python-opencage-geocoder/tarball/2.1.0", + download_url="https://github.com/OpenCageData/python-opencage-geocoder/tarball/3.0.0", license="BSD", + entry_points={ + 'console_scripts': [ + 'opencage=opencage.command_line:main' + ] + }, packages=find_packages(), include_package_data=True, zip_safe=False, @@ -48,17 +53,20 @@ 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', "Programming Language :: Python :: 3 :: Only", - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Topic :: Scientific/Engineering :: GIS', 'Topic :: Utilities' ], install_requires=[ 'Requests>=2.31.0', - 'backoff>=2.2.1' + 'backoff>=2.2.1', + 'tqdm>=4.66.4', + 'certifi>=2024.07.04', + 'aiohttp>=3.10.5' ], test_suite='pytest', tests_require=[ diff --git a/test/cli/test_cli_args.py b/test/cli/test_cli_args.py new file mode 100644 index 0000000..f8e5bb7 --- /dev/null +++ b/test/cli/test_cli_args.py @@ -0,0 +1,139 @@ +import pathlib +import pytest +from opencage.version import __version__ + +from opencage.command_line import parse_args + +@pytest.fixture(autouse=True) +def around(): + yield + try: + pathlib.Path("test/fixtures/output.csv").unlink() + except FileNotFoundError: + pass + +def assert_parse_args_error(args, message, capfd): + with pytest.raises(SystemExit): + parse_args(args) + + _, err = capfd.readouterr() + assert message in err + +def test_required_arguments(capfd): + assert_parse_args_error( + [], + 'To display help use', + capfd + ) + +def test_invalid_command(capfd): + assert_parse_args_error( + [ + "singasong" + ], + 'argument command: invalid choice', + capfd + ) + +def test_invalid_command(capfd): + with pytest.raises(SystemExit): + parse_args(['--version']) + out, err = capfd.readouterr() + + assert __version__ in out + +def test_invalid_api_key(capfd): + assert_parse_args_error( + [ + "forward", + "--api-key", "invalid", + "--input", "test/fixtures/input.txt", + "--output", "test/fixtures/output.csv" + ], + 'invalid API key', + capfd + ) + +def test_existing_output_file(capfd): + assert_parse_args_error( + [ + "forward", + "--api-key", "oc_gc_12345678901234567890123456789012", + "--input", "test/fixtures/input.txt", + "--output", "test/fixtures/input.txt" + ], + 'already exists', + capfd + ) + +def test_argument_range(capfd): + assert_parse_args_error( + [ + "forward", + "--api-key", "oc_gc_12345678901234567890123456789012", + "--input", "test/fixtures/input.txt", + "--output", "test/fixtures/output.csv", + "--workers", "200" + ], + 'must be within [1, 20]', + capfd + ) + +def test_full_argument_list(): + args = parse_args([ + "reverse", + "--api-key", "oc_gc_12345678901234567890123456789012", + "--input", "test/fixtures/input.txt", + "--output", "test/fixtures/output.csv", + "--headers", + "--input-columns", "1,2", + "--add-columns", "city,postcode", + "--limit", "4", + "--workers", "3", + "--timeout", "2", + "--retries", "1", + "--dry-run", + "--api-domain", "bulk.opencagedata.com", + "--extra-params", "extra=1", + "--no-progress", + "--quiet" + ]) + + assert args.command == "reverse" + assert args.api_key == "oc_gc_12345678901234567890123456789012" + assert args.input.name == "test/fixtures/input.txt" + assert args.output == "test/fixtures/output.csv" + assert args.headers is True + assert args.input_columns == [1, 2] + assert args.add_columns == ["city", "postcode"] + assert args.limit == 4 + assert args.workers == 3 + assert args.timeout == 2 + assert args.retries == 1 + assert args.dry_run is True + assert args.api_domain == "bulk.opencagedata.com" + assert args.extra_params == { "extra": "1" } + assert args.no_progress is True + assert args.quiet is True + +def test_defaults(): + args = parse_args([ + "forward", + "--api-key", "12345678901234567890123456789012", + "--input", "test/fixtures/input.txt", + "--output", "test/fixtures/output.csv" + ]) + + assert args.command == "forward" + assert args.limit == 0 + assert args.headers is False + assert args.input_columns == [1] + assert args.add_columns == ["lat", "lng", "_type", "_category", "country_code", "country", "state", "county", "_normalized_city", "postcode", "road", "house_number", "confidence", "formatted"] + assert args.workers == 1 + assert args.timeout == 10 + assert args.retries == 10 + assert args.dry_run is False + assert args.api_domain == "api.opencagedata.com" + assert args.extra_params == {} + assert args.no_progress is False + assert args.quiet is False diff --git a/test/cli/test_cli_run.py b/test/cli/test_cli_run.py new file mode 100644 index 0000000..4b52b6c --- /dev/null +++ b/test/cli/test_cli_run.py @@ -0,0 +1,163 @@ +import pathlib +import pytest +import os +import sys + +from opencage.command_line import main + +# NOTE: Testing keys https://opencagedata.com/api#testingkeys +TEST_APIKEY_200 = '6d0e711d72d74daeb2b0bfd2a5cdfdba' # always returns same address +TEST_APIKEY_401 = '11111111111111111111111111111111' # invalid key + +@pytest.fixture(autouse=True) +def around(): + yield + try: + pathlib.Path("test/fixtures/cli/output.csv").unlink() + except FileNotFoundError: + pass + +def assert_output(path, length, lines): + assert pathlib.Path(path).exists() + + with open(path, "r", encoding="utf-8") as f: + actual = f.readlines() + # print(actual, file=sys.stderr) + assert len(actual) == length + + for i, expected in enumerate(lines): + assert actual[i].strip() == expected + +def test_forward(): + main([ + "forward", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/forward.csv", + "--output", "test/fixtures/cli/output.csv", + "--input-columns", "2,3,4", + "--add-columns", "country_code,country,postcode,city" + ]) + + assert_output( + path="test/fixtures/cli/output.csv", + length=3, + lines=['Rathausmarkt 1,Hamburg,20095,Germany,de,Germany,48153,Münster'] + ) + +def test_reverse(): + main([ + "reverse", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/reverse.csv", + "--output", "test/fixtures/cli/output.csv", + "--add-columns", "country_code,country,postcode" + ]) + + assert_output( + path="test/fixtures/cli/output.csv", + length=1, + lines=['51.9526622,7.6324709,de,Germany,48153'] + ) + +def test_headers(): + main([ + "forward", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/forward_with_headers.csv", + "--output", "test/fixtures/cli/output.csv", + "--input-columns", "1,2,3,4", + "--headers", + "--add-columns", "lat,lng,postcode" + ]) + + assert_output( + path="test/fixtures/cli/output.csv", + length=4, + lines=[ + 'street and number,town,postcode,country,lat,lng,postcode', + 'Rathausmarkt 1,Hamburg,20095,Germany,51.9526622,7.6324709,48153' + ] + ) + +def test_input_errors(capfd): + main([ + "reverse", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/reverse_with_errors.csv", + "--output", "test/fixtures/cli/output.csv", + "--no-progress" + ]) + + _, err = capfd.readouterr() + assert 'Missing input column 2 in' in err + assert 'Expected two comma-separated values' in err + +def test_empty_result(capfd): + # 'NOWHERE-INTERESTING' is guaranteed to return no result + # https://opencagedata.com/api#testingkeys + main([ + "forward", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/forward_noresult.csv", + "--output", "test/fixtures/cli/output.csv", + "--input-columns", "2", + "--headers", + "--verbose", + "--add-columns", "lat,lng,postcode" + ]) + + assert_output( + path="test/fixtures/cli/output.csv", + length=2, + lines=[ + 'id,full_address,lat,lng,postcode', + '123,NOWHERE-INTERESTING,,,' + ] + ) + + +def test_invalid_api_key(capfd): + main([ + "forward", + "--api-key", TEST_APIKEY_401, + "--input", "test/fixtures/cli/forward_with_headers.csv", + "--output", "test/fixtures/cli/output.csv" + ]) + + _, err = capfd.readouterr() + assert 'Your API key is not authorized' in err + +def test_dryrun(capfd): + main([ + "forward", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/forward_with_headers.csv", + "--output", "test/fixtures/cli/output.csv", + "--dry-run" + ]) + + assert not os.path.isfile("test/fixtures/cli/output.csv") + +def test_invalid_domain(capfd): + main([ + "forward", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/forward.csv", + "--output", "test/fixtures/cli/output.csv", + "--api-domain", "invalid73585348.opencagedata.com" + ]) + + _, err = capfd.readouterr() + assert 'Cannot connect to host' in err + + # with dry-run no request will be made + main([ + "forward", + "--api-key", TEST_APIKEY_200, + "--input", "test/fixtures/cli/forward.csv", + "--output", "test/fixtures/cli/output.csv", + "--api-domain", "invalid73585348.opencagedata.com", + "--dry-run" + ]) + _, err = capfd.readouterr() + assert err == '' diff --git a/test/fixtures/cli/forward.csv b/test/fixtures/cli/forward.csv new file mode 100644 index 0000000..64283bd --- /dev/null +++ b/test/fixtures/cli/forward.csv @@ -0,0 +1,3 @@ +"Rathausmarkt 1",Hamburg,20095,Germany +"10 Downing Street",London,"SW1A 2AA","United Kingdom" +"C/ de Mallorca 401",Barcelona,08013,Spain \ No newline at end of file diff --git a/test/fixtures/cli/forward_noresult.csv b/test/fixtures/cli/forward_noresult.csv new file mode 100644 index 0000000..1d4dceb --- /dev/null +++ b/test/fixtures/cli/forward_noresult.csv @@ -0,0 +1,2 @@ +id,full_address +123,NOWHERE-INTERESTING \ No newline at end of file diff --git a/test/fixtures/cli/forward_with_headers.csv b/test/fixtures/cli/forward_with_headers.csv new file mode 100644 index 0000000..feaa784 --- /dev/null +++ b/test/fixtures/cli/forward_with_headers.csv @@ -0,0 +1,4 @@ +street and number,town,postcode,country +"Rathausmarkt 1",Hamburg,20095,Germany +"10 Downing Street",London,"SW1A 2AA","United Kingdom" +"C/ de Mallorca 401",Barcelona,08013,Spain \ No newline at end of file diff --git a/test/fixtures/cli/reverse.csv b/test/fixtures/cli/reverse.csv new file mode 100644 index 0000000..95ab1fd --- /dev/null +++ b/test/fixtures/cli/reverse.csv @@ -0,0 +1 @@ +51.9526622,7.6324709 \ No newline at end of file diff --git a/test/fixtures/cli/reverse_with_errors.csv b/test/fixtures/cli/reverse_with_errors.csv new file mode 100644 index 0000000..c6bf1c2 --- /dev/null +++ b/test/fixtures/cli/reverse_with_errors.csv @@ -0,0 +1 @@ +50.101010 \ No newline at end of file diff --git a/test/fixtures/input.txt b/test/fixtures/input.txt new file mode 100644 index 0000000..e69de29 diff --git a/test/test_error_ratelimit_exceeded.py b/test/test_error_ratelimit_exceeded.py index 28d9f39..627f95b 100644 --- a/test/test_error_ratelimit_exceeded.py +++ b/test/test_error_ratelimit_exceeded.py @@ -1,6 +1,5 @@ from pathlib import Path -import datetime import httpretty import pytest @@ -39,6 +38,3 @@ def test_rate_limit_exceeded(): with pytest.raises(RateLimitExceededError) as excinfo: geocoder.geocode("whatever") assert 'You have used the requests available on your plan.' in str(excinfo.value) - # 'It will reset to 2500 on 2021-03-08T00:00:00' - # assert excinfo.value.reset_to == 2500 - # assert excinfo.value.reset_time == datetime.datetime(2021, 3, 8, 0, 0) diff --git a/tox.ini b/tox.ini index bd9ee7e..5c1cc05 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py37,py38,py39,py310,py311,py312,lint +envlist = py38,py39,py310,py311,py312,lint [gh] python = @@ -8,7 +8,6 @@ python = 3.10 = py310 3.9 = py39 3.8 = py38 - 3.7 = py37 [testenv] deps = diff --git a/vagrant-provision.sh b/vagrant-provision.sh index b61c360..7182c21 100755 --- a/vagrant-provision.sh +++ b/vagrant-provision.sh @@ -26,7 +26,7 @@ exec $SHELL # 'exec $SHELL' doesn't work well in a provision file. Likely you need to # run the following commands manually after 'vagrant up' -for VERSION in 3.7 3.8 3.9 3.10 3.11; do +for VERSION in 3.8 3.9 3.10 3.11; do VERSION=$(pyenv latest --known $VERSION) echo "Installing $VERSION ..." pyenv install --skip-existing $VERSION