Skip to content

Commit

Permalink
Python bindings: add a osgeo.gdal_fsspec module that on import will r…
Browse files Browse the repository at this point in the history
…egister GDAL VSI file system handlers as fsspec AbstractFileSystem
  • Loading branch information
rouault committed Oct 12, 2024
1 parent 0dede5a commit 2210300
Show file tree
Hide file tree
Showing 11 changed files with 582 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cmake_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ jobs:
cfitsio freexl geotiff libjpeg-turbo libpq libspatialite libwebp-base pcre pcre2 postgresql \
sqlite tiledb zstd cryptopp cgal doxygen librttopo libkml openssl xz \
openjdk ant qhull armadillo blas blas-devel libblas libcblas liblapack liblapacke blosc libarchive \
arrow-cpp pyarrow libaec libheif libavif cmake
arrow-cpp pyarrow libaec libheif libavif cmake fsspec
- name: Check CMake version
shell: bash -l {0}
run: |
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ubuntu_24.04/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,5 @@ RUN python3 -m pip install -U --break-system-packages -r /tmp/requirements.txt
# cfchecker requires udunits2
RUN apt-get install -y --allow-unauthenticated libudunits2-0 libudunits2-data
RUN python3 -m pip install --break-system-packages cfchecker

RUN python3 -m pip install --break-system-packages fsspec
9 changes: 6 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ repos:
- id: black
exclude: >
(?x)^(
swig/python/osgeo/|
swig/python/osgeo/__init__.py|
swig/python/osgeo/gdalnumeric.py|
autotest/ogr/data/
)
- repo: https://github.com/timothycrosley/isort
Expand All @@ -14,7 +15,8 @@ repos:
- id: isort
exclude: >
(?x)^(
swig/python/osgeo/|
swig/python/osgeo/__init__.py|
swig/python/osgeo/gdalnumeric.py|
autotest/ogr/data/
)
- repo: https://github.com/pycqa/flake8
Expand All @@ -23,7 +25,8 @@ repos:
- id: flake8
exclude: >
(?x)^(
swig/python/osgeo/|
swig/python/osgeo/__init__.py|
swig/python/osgeo/gdalnumeric.py|
examples/|
autotest/ogr/data/
)
Expand Down
229 changes: 229 additions & 0 deletions autotest/gcore/test_gdal_fsspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
#!/usr/bin/env pytest
# -*- coding: utf-8 -*-
###############################################################################
# Project: GDAL/OGR Test Suite
# Purpose: Test gdal_fsspec module
# Author: Even Rouault <even dot rouault at spatialys.com>
#
###############################################################################
# Copyright (c) 20124, Even Rouault <even dot rouault at spatialys.com>
#
# SPDX-License-Identifier: MIT
###############################################################################

import pytest

from osgeo import gdal

fsspec = pytest.importorskip("fsspec")
pytest.importorskip("fsspec.spec")

from osgeo import gdal_fsspec # NOQA


def test_gdal_fsspec_open_read():

with fsspec.open("vsi://data/byte.tif") as f:
assert len(f.read()) == gdal.VSIStatL("data/byte.tif").size


def test_gdal_fsspec_info_file():

fs = fsspec.filesystem("vsi")
info = fs.info("data/byte.tif")
assert "mtime" in info
del info["mtime"]
assert (info["mode"] & 32768) != 0
del info["mode"]
assert info == {
"name": "data/byte.tif",
"size": 736,
"type": "file",
}


def test_gdal_fsspec_info_dir():

fs = fsspec.filesystem("vsi")
info = fs.info("data")
assert (info["mode"] & 16384) != 0
del info["mode"]
assert info == {
"name": "data",
"size": 0,
"type": "directory",
}


def test_gdal_fsspec_info_error():

fs = fsspec.filesystem("vsi")
with pytest.raises(FileNotFoundError):
fs.info("/i/do/not/exist")


def test_gdal_fsspec_ls():

fs = fsspec.filesystem("vsi")
ret = fs.ls("data")
assert len(ret) > 2
item_of_interest = None
for item in ret:
if item["name"] == "data/byte.tif":
item_of_interest = item
break
assert item_of_interest
assert "mtime" in item_of_interest
del item_of_interest["mtime"]
assert item_of_interest == {
"name": "data/byte.tif",
"size": 736,
"type": "file",
}


def test_gdal_fsspec_ls_file():

fs = fsspec.filesystem("vsi")
ret = fs.ls("data/byte.tif")
assert ret == ["data/byte.tif"]


def test_gdal_fsspec_ls_error():

fs = fsspec.filesystem("vsi")
with pytest.raises(FileNotFoundError):
fs.ls("vsi://i/do/not/exist")


def test_gdal_fsspec_modified():

fs = fsspec.filesystem("vsi")
modified = fs.modified("data/byte.tif")
assert modified is not None
import datetime

assert isinstance(modified, datetime.datetime)


def test_gdal_fsspec_modified_error():

fs = fsspec.filesystem("vsi")
with pytest.raises(FileNotFoundError):
fs.modified("vsi://i/do/not/exist")


def test_gdal_fsspec_rm():

with fsspec.open("vsimem:///foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("vsimem")
fs.info("/foo.bin")
fs.rm("/foo.bin")
with pytest.raises(FileNotFoundError):
fs.info("/foo.bin")


def test_gdal_fsspec_rm_error():

fs = fsspec.filesystem("vsimem")
with pytest.raises(FileNotFoundError):
fs.rm("/foo.bin")


def test_gdal_fsspec_copy():

with fsspec.open("vsimem://foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("vsimem")
fs.copy("/foo.bin", "/bar.bin")
assert fs.info("/bar.bin")["size"] == 3
assert fs.info("/foo.bin")["size"] == 3
fs.rm("/foo.bin")
fs.rm("/bar.bin")


def test_gdal_fsspec_copy_error():

fs = fsspec.filesystem("vsimem")
with pytest.raises(FileNotFoundError):
fs.copy("/foo.bin", "/bar.bin")


def test_gdal_fsspec_mv():

with fsspec.open("vsimem://foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("vsimem")
fs.mv("/foo.bin", "/bar.bin")
assert fs.info("/bar.bin")["size"] == 3
with pytest.raises(FileNotFoundError):
fs.info("/foo.bin")
fs.rm("/bar.bin")


def test_gdal_fsspec_mv_error():

fs = fsspec.filesystem("vsimem")
with pytest.raises(FileNotFoundError):
fs.mv("/foo.bin", "/bar.bin")


def test_gdal_fsspec_mkdir(tmp_path):

fs = fsspec.filesystem("vsi")

my_path = str(tmp_path) + "/my_dir"

fs.mkdir(my_path)
assert fs.info(my_path)["type"] == "directory"
with pytest.raises(FileExistsError):
fs.mkdir(my_path)
fs.rmdir(my_path)

fs.mkdir(my_path + "/my_subdir")
assert fs.info(my_path)["type"] == "directory"
assert fs.info(my_path + "/my_subdir")["type"] == "directory"
fs.rmdir(my_path + "/my_subdir")
fs.rmdir(my_path)
with pytest.raises(FileNotFoundError):
fs.info(my_path)

fs = fsspec.filesystem("vsi")
with pytest.raises(Exception):
fs.mkdir(my_path + "/my_subdir", create_parents=False)
with pytest.raises(FileNotFoundError):
fs.info(my_path)


def test_gdal_fsspec_makedirs(tmp_path):

fs = fsspec.filesystem("vsi")

my_path = str(tmp_path) + "/my_dir"
fs.makedirs(my_path)
assert fs.info(my_path)["type"] == "directory"
with pytest.raises(FileExistsError):
fs.makedirs(my_path)
fs.makedirs(my_path, exist_ok=True)
fs.rmdir(my_path)


def test_gdal_fsspec_usable_by_pyarrow_dataset(tmp_vsimem):

ds = pytest.importorskip("pyarrow.dataset")

tmp_vsimem_file = str(tmp_vsimem / "tmp.parquet")
gdal.FileFromMemBuffer(
tmp_vsimem_file, open("../ogr/data/parquet/test.parquet", "rb").read()
)

fs_vsimem = fsspec.filesystem("vsimem")

assert (
ds.dataset(tmp_vsimem_file[len("/vsimem") :], filesystem=fs_vsimem) is not None
)

assert (
ds.dataset(str(tmp_vsimem)[len("/vsimem") :], filesystem=fs_vsimem) is not None
)
1 change: 1 addition & 0 deletions doc/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This file may be used to create an environment using:
# $ pip install --upgrade -r <this file>
fsspec
numpy
sphinx
breathe
Expand Down
20 changes: 20 additions & 0 deletions doc/source/api/python/general.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,26 @@ Error Handling
File Management
---------------

osgeo.gdal_fsspec module
++++++++++++++++++++++++

.. automodule:: osgeo.gdal_fsspec
:members:
:undoc-members:
:show-inheritance:
:noindex:

osgeo.gdal.VSIFile class
++++++++++++++++++++++++

.. autoclass:: osgeo.gdal.VSIFile
:members:
:undoc-members:
:noindex:

Low level functions
+++++++++++++++++++

.. autofunction:: osgeo.gdal.CloseDir

.. autofunction:: osgeo.gdal.CopyFile
Expand Down
12 changes: 12 additions & 0 deletions doc/source/api/python/osgeo.gdal_fsspec.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
..
The documentation displayed on this page is automatically generated from
Python docstrings. See https://gdal.org/development/dev_documentation.html
for information on updating this content.
osgeo.gdal_fsspec module
========================

.. automodule:: osgeo.gdal_fsspec
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions doc/source/api/python/osgeo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Submodules

osgeo.gdal
osgeo.gdal_array
osgeo.gdal_fsspec
osgeo.gdalconst
osgeo.gnm
osgeo.ogr
Expand Down
8 changes: 8 additions & 0 deletions swig/include/cpl.i
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,14 @@ void CopyFileRestartable(const char* pszSource,

}

%rename (MoveFile) wrapper_MoveFile;
%inline {
int wrapper_MoveFile(const char* pszSource, const char* pszTarget)
{
return CPLMoveFile(pszTarget, pszSource);
}
}

%clear (const char* pszSource);
%clear (const char* pszTarget);

Expand Down
10 changes: 10 additions & 0 deletions swig/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ set(GDAL_PYTHON_CSOURCES
list(APPEND GDAL_PYTHON_PYSOURCES "${CMAKE_CURRENT_BINARY_DIR}/osgeo/__init__.py")
endif()

if (NOT "${CMAKE_BINARY_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdal_fsspec.py"
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/osgeo/gdal_fsspec.py" "${CMAKE_CURRENT_BINARY_DIR}/osgeo"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/osgeo/gdal_fsspec.py")
list(APPEND GDAL_PYTHON_PYSOURCES "${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdal_fsspec.py")
endif()

foreach(_file IN ITEMS ${GDAL_PYTHON_CSOURCES})
add_custom_command(
OUTPUT ${_file}
Expand Down Expand Up @@ -544,6 +552,7 @@ elseif (Python_Development_FOUND)
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdal.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdalconst.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdalnumeric.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdal_fsspec.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gnm.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/ogr.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/osr.py
Expand All @@ -554,6 +563,7 @@ elseif (Python_Development_FOUND)
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdal.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdalconst.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdalnumeric.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gdal_fsspec.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/gnm.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/ogr.py
${CMAKE_CURRENT_BINARY_DIR}/osgeo/osr.py
Expand Down
Loading

0 comments on commit 2210300

Please sign in to comment.