From 0b07ea7db3c95b57e779a4ef0c59d0b08bf9f552 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 12 Oct 2024 15:10:11 +0200 Subject: [PATCH] Python bindings: simplify previous commit to have a single 'gdalvsi' protocol --- autotest/gcore/test_gdal_fsspec.py | 86 ++++++++++++++---------------- doc/source/spelling_wordlist.txt | 1 + swig/python/osgeo/gdal_fsspec.py | 84 +++++------------------------ 3 files changed, 56 insertions(+), 115 deletions(-) diff --git a/autotest/gcore/test_gdal_fsspec.py b/autotest/gcore/test_gdal_fsspec.py index ac227a57db10..38deb8da07be 100644 --- a/autotest/gcore/test_gdal_fsspec.py +++ b/autotest/gcore/test_gdal_fsspec.py @@ -23,13 +23,13 @@ def test_gdal_fsspec_open_read(): - with fsspec.open("vsi://data/byte.tif") as f: + with fsspec.open("gdalvsi://data/byte.tif") as f: assert len(f.read()) == gdal.VSIStatL("data/byte.tif").size def test_gdal_fsspec_info_file(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") info = fs.info("data/byte.tif") assert "mtime" in info del info["mtime"] @@ -44,7 +44,7 @@ def test_gdal_fsspec_info_file(): def test_gdal_fsspec_info_dir(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") info = fs.info("data") assert (info["mode"] & 16384) != 0 del info["mode"] @@ -57,14 +57,14 @@ def test_gdal_fsspec_info_dir(): def test_gdal_fsspec_info_error(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(FileNotFoundError): fs.info("/i/do/not/exist") def test_gdal_fsspec_ls(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") ret = fs.ls("data") assert len(ret) > 2 item_of_interest = None @@ -84,21 +84,21 @@ def test_gdal_fsspec_ls(): def test_gdal_fsspec_ls_file(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") ret = fs.ls("data/byte.tif") assert ret == ["data/byte.tif"] def test_gdal_fsspec_ls_error(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(FileNotFoundError): - fs.ls("vsi://i/do/not/exist") + fs.ls("gdalvsi://i/do/not/exist") def test_gdal_fsspec_modified(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") modified = fs.modified("data/byte.tif") assert modified is not None import datetime @@ -108,70 +108,70 @@ def test_gdal_fsspec_modified(): def test_gdal_fsspec_modified_error(): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(FileNotFoundError): - fs.modified("vsi://i/do/not/exist") + fs.modified("gdalvsi://i/do/not/exist") def test_gdal_fsspec_rm(): - with fsspec.open("vsimem:///foo.bin", "wb") as f: + with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f: f.write(b"""bar""") - fs = fsspec.filesystem("vsimem") - fs.info("/foo.bin") - fs.rm("/foo.bin") + fs = fsspec.filesystem("gdalvsi") + fs.info("/vsimem/foo.bin") + fs.rm("/vsimem/foo.bin") with pytest.raises(FileNotFoundError): - fs.info("/foo.bin") + fs.info("/vsimem/foo.bin") def test_gdal_fsspec_rm_error(): - fs = fsspec.filesystem("vsimem") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(FileNotFoundError): - fs.rm("/foo.bin") + fs.rm("/vsimem/foo.bin") def test_gdal_fsspec_copy(): - with fsspec.open("vsimem://foo.bin", "wb") as f: + with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f: f.write(b"""bar""") - fs = fsspec.filesystem("vsimem") - fs.copy("/foo.bin", "/bar.bin") - assert fs.info("/bar.bin")["size"] == 3 - assert fs.info("/foo.bin")["size"] == 3 - fs.rm("/foo.bin") - fs.rm("/bar.bin") + fs = fsspec.filesystem("gdalvsi") + fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin") + assert fs.info("/vsimem/bar.bin")["size"] == 3 + assert fs.info("/vsimem/foo.bin")["size"] == 3 + fs.rm("/vsimem/foo.bin") + fs.rm("/vsimem/bar.bin") def test_gdal_fsspec_copy_error(): - fs = fsspec.filesystem("vsimem") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(FileNotFoundError): - fs.copy("/foo.bin", "/bar.bin") + fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin") def test_gdal_fsspec_mv(): - with fsspec.open("vsimem://foo.bin", "wb") as f: + with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f: f.write(b"""bar""") - fs = fsspec.filesystem("vsimem") - fs.mv("/foo.bin", "/bar.bin") - assert fs.info("/bar.bin")["size"] == 3 + fs = fsspec.filesystem("gdalvsi") + fs.mv("/vsimem/foo.bin", "/vsimem/bar.bin") + assert fs.info("/vsimem/bar.bin")["size"] == 3 with pytest.raises(FileNotFoundError): - fs.info("/foo.bin") - fs.rm("/bar.bin") + fs.info("/vsimem/foo.bin") + fs.rm("/vsimem/bar.bin") def test_gdal_fsspec_mv_error(): - fs = fsspec.filesystem("vsimem") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(FileNotFoundError): - fs.mv("/foo.bin", "/bar.bin") + fs.mv("/vsimem/foo.bin", "/bar.bin") def test_gdal_fsspec_mkdir(tmp_path): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") my_path = str(tmp_path) + "/my_dir" @@ -189,7 +189,7 @@ def test_gdal_fsspec_mkdir(tmp_path): with pytest.raises(FileNotFoundError): fs.info(my_path) - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") with pytest.raises(Exception): fs.mkdir(my_path + "/my_subdir", create_parents=False) with pytest.raises(FileNotFoundError): @@ -198,7 +198,7 @@ def test_gdal_fsspec_mkdir(tmp_path): def test_gdal_fsspec_makedirs(tmp_path): - fs = fsspec.filesystem("vsi") + fs = fsspec.filesystem("gdalvsi") my_path = str(tmp_path) + "/my_dir" fs.makedirs(my_path) @@ -218,12 +218,8 @@ def test_gdal_fsspec_usable_by_pyarrow_dataset(tmp_vsimem): tmp_vsimem_file, open("../ogr/data/parquet/test.parquet", "rb").read() ) - fs_vsimem = fsspec.filesystem("vsimem") + fs_vsimem = fsspec.filesystem("gdalvsi") - assert ( - ds.dataset(tmp_vsimem_file[len("/vsimem") :], filesystem=fs_vsimem) is not None - ) + assert ds.dataset(tmp_vsimem_file, filesystem=fs_vsimem) is not None - assert ( - ds.dataset(str(tmp_vsimem)[len("/vsimem") :], filesystem=fs_vsimem) is not None - ) + assert ds.dataset(str(tmp_vsimem), filesystem=fs_vsimem) is not None diff --git a/doc/source/spelling_wordlist.txt b/doc/source/spelling_wordlist.txt index 4c4ea2d861af..484b6fd812ea 100644 --- a/doc/source/spelling_wordlist.txt +++ b/doc/source/spelling_wordlist.txt @@ -1018,6 +1018,7 @@ GDALThreadLocalDatasetCache gdaltindex gdaltransform gdalvirtualmem +gdalvsi gdalwarp gdalwmscache gdb diff --git a/swig/python/osgeo/gdal_fsspec.py b/swig/python/osgeo/gdal_fsspec.py index 5debc910ec36..5861422095dc 100644 --- a/swig/python/osgeo/gdal_fsspec.py +++ b/swig/python/osgeo/gdal_fsspec.py @@ -1,29 +1,18 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2024, Even Rouault -"""Module exposing GDAL Virtual File Systems (VSI) as fsspec implementations. +"""Module exposing GDAL Virtual File Systems (VSI) as a "gdalvsi" fsspec implementation. Importing "osgeo.gdal_fsspec" requires the Python "fsspec" (https://filesystem-spec.readthedocs.io/en/latest/) module to be available. - A generic "vsi" fsspec protocol is available. All GDAL VSI file names must be - simply prefixed with "vsi://". For example: + A generic "gdalvsi" fsspec protocol is available. All GDAL VSI file names must be + simply prefixed with "gdalvsi://". For example: - - "vsi://data/byte.tif" to access relative file "data/byte.tif" - - "vsi:///home/user/byte.tif" to access absolute file "/home/user/byte.tif" - - "vsi:///vsimem/byte.tif" (note the 3 slashes) to access VSIMem file "/vsimem/byte.tif" - - Each VSI file system is also registered as a distinct fsspec protocol, such - as "vsimem", "vsicurl", "vsizip", "vsitar", etc. - - Examples: - - - "vsimem://byte.tif" to access file "/vsimem/byte.tif" - - "vsicurl://http://example.com/foo" to access file "/vsicurl/http://example.com/foo" - - "vsis3://my_bucket/byte.tif" to access file "/vsis3/my_bucket/byte.tif" - - "vsizip:///home/user/my.zip/foo.tif" (note the 3 slashes to indicate absolute path) - to access (absolute) file "/vsizip//home/user/my.zip/foo.tif" - - "vsizip://my.zip/foo.tif" to access (relative) file "/vsizip/my.zip/foo.tif" + - "gdalvsi://data/byte.tif" to access relative file "data/byte.tif" + - "gdalvsi:///home/user/byte.tif" to access absolute file "/home/user/byte.tif" + - "gdalvsi:///vsimem/byte.tif" (note the 3 slashes) to access VSIMem file "/vsimem/byte.tif" + - "gdalvsi:///vsicurl/https://example.com/byte.tif (note the 3 slashes) to access "https://example.com/byte.tif" through /vsicurl/ :since: GDAL 3.11 """ @@ -44,52 +33,19 @@ class VSIFileSystem(AbstractFileSystem): def _get_gdal_path(cls, path): """Return a GDAL compatible file from a fsspec file name. - For the file system using the generic "vsi" protocol, - remove the leading vsi:// if found (normally, it should be there, + Remove the leading vsi:// if found (normally, it should be there, but most AbstractFileSystem implementations seem to be ready to remove it if found) - - For specialized file systems, like vsimem://, etc., for an input - like "vsimem:///foo", return "/vsimem/foo". And for an input like - "/foo" also return "/vsimem/foo". """ if isinstance(path, PurePath): path = stringify_path(path) - if cls.protocol == "vsi": - # "vsi://something" just becomes "something" - if path.startswith("vsi://"): - return path[len("vsi://") :] - - return path + # "vsi://something" just becomes "something" + if path.startswith("vsi://"): + return path[len("vsi://") :] - else: - list_protocols_that_need_leeding_slash = [ - "vsis3", - "vsigs", - "vsiaz", - "vsioss", - "vsiswift", - ] - list_protocols_that_need_leeding_slash += [ - item + "_streaming" for item in list_protocols_that_need_leeding_slash - ] - list_protocols_that_need_leeding_slash.append("vsimem") - - # Deal with paths like "vsis3://foo" - full_protocol = cls.protocol + "://" - if path.startswith(full_protocol): - path = path[len(full_protocol) :] - - # Deal with paths like "/foo" with a VSIFileSystem that is something like "vsis3" - if ( - cls.protocol in list_protocols_that_need_leeding_slash - and not path.startswith("/") - ): - path = "/" + path - - return "/" + cls.protocol + path + return path def _open( self, @@ -271,22 +227,10 @@ def copy( def register_vsi_implementations(): - """Register a generic "vsi" protocol and "vsimem", "vsitar", etc. + """Register a generic "gdalvsi" protocol. This method is automatically called on osgeo.gdal_fsspec import. """ - register_implementation("vsi", VSIFileSystem) - for vsi_prefix in gdal.GetFileSystemsPrefixes(): - if vsi_prefix.startswith("/vsi") and not vsi_prefix.endswith("?"): - assert vsi_prefix.endswith("/") - protocol = vsi_prefix[1:-1] - # We need to duplicate the base class for each protocol, so that - # each class has a distinct "protocol" member. - new_class = type( - "VSIFileSystem_" + protocol, - VSIFileSystem.__bases__, - dict(VSIFileSystem.__dict__), - ) - register_implementation(protocol, new_class) + register_implementation("gdalvsi", VSIFileSystem) register_vsi_implementations()