Skip to content

Commit

Permalink
Merge pull request OSGeo#10985 from rouault/gdal_fsspec
Browse files Browse the repository at this point in the history
Python bindings: add a osgeo.gdal_fsspec module that on import will register GDAL VSI file system handlers as fsspec AbstractFileSystem
  • Loading branch information
rouault authored Oct 29, 2024
2 parents 8fb79c4 + 0b07ea7 commit 81fb43a
Show file tree
Hide file tree
Showing 14 changed files with 624 additions and 84 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cmake_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ jobs:
cfitsio freexl geotiff libjpeg-turbo libpq libspatialite libwebp-base pcre pcre2 postgresql \
sqlite tiledb zstd cryptopp cgal doxygen librttopo libkml openssl xz \
openjdk ant qhull armadillo blas blas-devel libblas libcblas liblapack liblapacke blosc libarchive \
arrow-cpp pyarrow libaec libheif libavif cmake
arrow-cpp pyarrow libaec libheif libavif cmake fsspec
- name: Check CMake version
shell: bash -l {0}
run: |
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ubuntu_24.04/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,5 @@ RUN python3 -m pip install -U --break-system-packages -r /tmp/requirements.txt
# cfchecker requires udunits2
RUN apt-get install -y --allow-unauthenticated libudunits2-0 libudunits2-data
RUN python3 -m pip install --break-system-packages cfchecker

RUN python3 -m pip install --break-system-packages fsspec
9 changes: 6 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ repos:
- id: black
exclude: >
(?x)^(
swig/python/osgeo/|
swig/python/osgeo/__init__.py|
swig/python/osgeo/gdalnumeric.py|
autotest/ogr/data/
)
- repo: https://github.com/timothycrosley/isort
Expand All @@ -14,7 +15,8 @@ repos:
- id: isort
exclude: >
(?x)^(
swig/python/osgeo/|
swig/python/osgeo/__init__.py|
swig/python/osgeo/gdalnumeric.py|
autotest/ogr/data/
)
- repo: https://github.com/pycqa/flake8
Expand All @@ -23,7 +25,8 @@ repos:
- id: flake8
exclude: >
(?x)^(
swig/python/osgeo/|
swig/python/osgeo/__init__.py|
swig/python/osgeo/gdalnumeric.py|
examples/|
autotest/ogr/data/
)
Expand Down
225 changes: 225 additions & 0 deletions autotest/gcore/test_gdal_fsspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#!/usr/bin/env pytest
# -*- coding: utf-8 -*-
###############################################################################
# Project: GDAL/OGR Test Suite
# Purpose: Test gdal_fsspec module
# Author: Even Rouault <even dot rouault at spatialys.com>
#
###############################################################################
# Copyright (c) 20124, Even Rouault <even dot rouault at spatialys.com>
#
# SPDX-License-Identifier: MIT
###############################################################################

import pytest

from osgeo import gdal

fsspec = pytest.importorskip("fsspec")
pytest.importorskip("fsspec.spec")

from osgeo import gdal_fsspec # NOQA


def test_gdal_fsspec_open_read():

with fsspec.open("gdalvsi://data/byte.tif") as f:
assert len(f.read()) == gdal.VSIStatL("data/byte.tif").size


def test_gdal_fsspec_info_file():

fs = fsspec.filesystem("gdalvsi")
info = fs.info("data/byte.tif")
assert "mtime" in info
del info["mtime"]
assert (info["mode"] & 32768) != 0
del info["mode"]
assert info == {
"name": "data/byte.tif",
"size": 736,
"type": "file",
}


def test_gdal_fsspec_info_dir():

fs = fsspec.filesystem("gdalvsi")
info = fs.info("data")
assert (info["mode"] & 16384) != 0
del info["mode"]
assert info == {
"name": "data",
"size": 0,
"type": "directory",
}


def test_gdal_fsspec_info_error():

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.info("/i/do/not/exist")


def test_gdal_fsspec_ls():

fs = fsspec.filesystem("gdalvsi")
ret = fs.ls("data")
assert len(ret) > 2
item_of_interest = None
for item in ret:
if item["name"] == "data/byte.tif":
item_of_interest = item
break
assert item_of_interest
assert "mtime" in item_of_interest
del item_of_interest["mtime"]
assert item_of_interest == {
"name": "data/byte.tif",
"size": 736,
"type": "file",
}


def test_gdal_fsspec_ls_file():

fs = fsspec.filesystem("gdalvsi")
ret = fs.ls("data/byte.tif")
assert ret == ["data/byte.tif"]


def test_gdal_fsspec_ls_error():

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.ls("gdalvsi://i/do/not/exist")


def test_gdal_fsspec_modified():

fs = fsspec.filesystem("gdalvsi")
modified = fs.modified("data/byte.tif")
assert modified is not None
import datetime

assert isinstance(modified, datetime.datetime)


def test_gdal_fsspec_modified_error():

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.modified("gdalvsi://i/do/not/exist")


def test_gdal_fsspec_rm():

with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("gdalvsi")
fs.info("/vsimem/foo.bin")
fs.rm("/vsimem/foo.bin")
with pytest.raises(FileNotFoundError):
fs.info("/vsimem/foo.bin")


def test_gdal_fsspec_rm_error():

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.rm("/vsimem/foo.bin")


def test_gdal_fsspec_copy():

with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("gdalvsi")
fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin")
assert fs.info("/vsimem/bar.bin")["size"] == 3
assert fs.info("/vsimem/foo.bin")["size"] == 3
fs.rm("/vsimem/foo.bin")
fs.rm("/vsimem/bar.bin")


def test_gdal_fsspec_copy_error():

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin")


def test_gdal_fsspec_mv():

with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("gdalvsi")
fs.mv("/vsimem/foo.bin", "/vsimem/bar.bin")
assert fs.info("/vsimem/bar.bin")["size"] == 3
with pytest.raises(FileNotFoundError):
fs.info("/vsimem/foo.bin")
fs.rm("/vsimem/bar.bin")


def test_gdal_fsspec_mv_error():

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.mv("/vsimem/foo.bin", "/bar.bin")


def test_gdal_fsspec_mkdir(tmp_path):

fs = fsspec.filesystem("gdalvsi")

my_path = str(tmp_path) + "/my_dir"

fs.mkdir(my_path)
assert fs.info(my_path)["type"] == "directory"
with pytest.raises(FileExistsError):
fs.mkdir(my_path)
fs.rmdir(my_path)

fs.mkdir(my_path + "/my_subdir")
assert fs.info(my_path)["type"] == "directory"
assert fs.info(my_path + "/my_subdir")["type"] == "directory"
fs.rmdir(my_path + "/my_subdir")
fs.rmdir(my_path)
with pytest.raises(FileNotFoundError):
fs.info(my_path)

fs = fsspec.filesystem("gdalvsi")
with pytest.raises(Exception):
fs.mkdir(my_path + "/my_subdir", create_parents=False)
with pytest.raises(FileNotFoundError):
fs.info(my_path)


def test_gdal_fsspec_makedirs(tmp_path):

fs = fsspec.filesystem("gdalvsi")

my_path = str(tmp_path) + "/my_dir"
fs.makedirs(my_path)
assert fs.info(my_path)["type"] == "directory"
with pytest.raises(FileExistsError):
fs.makedirs(my_path)
fs.makedirs(my_path, exist_ok=True)
fs.rmdir(my_path)


def test_gdal_fsspec_usable_by_pyarrow_dataset(tmp_vsimem):

ds = pytest.importorskip("pyarrow.dataset")

tmp_vsimem_file = str(tmp_vsimem / "tmp.parquet")
gdal.FileFromMemBuffer(
tmp_vsimem_file, open("../ogr/data/parquet/test.parquet", "rb").read()
)

fs_vsimem = fsspec.filesystem("gdalvsi")

assert ds.dataset(tmp_vsimem_file, filesystem=fs_vsimem) is not None

assert ds.dataset(str(tmp_vsimem), filesystem=fs_vsimem) is not None
81 changes: 1 addition & 80 deletions autotest/pymod/gdaltest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2102,87 +2102,8 @@ def reopen(ds, update=False, open_options=None):
)


# VSIFile helper class


class VSIFile:
def __init__(self, path, mode, encoding="utf-8"):
self._path = path
self._mode = mode

self._binary = "b" in mode
self._encoding = encoding

self._fp = gdal.VSIFOpenExL(self._path, self._mode, True)
if self._fp is None:
raise OSError(gdal.VSIGetLastErrorMsg())

self._closed = False

def __enter__(self):
return self

def __exit__(self, *args):
self.close()

def __iter__(self):
return self

def __next__(self):
line = gdal.CPLReadLineL(self._fp)
if line is None:
raise StopIteration
if self._binary:
return line.encode()
return line

def close(self):
if self._closed:
return

self._closed = True
gdal.VSIFCloseL(self._fp)

def read(self, size=-1):
if size == -1:
pos = self.tell()
self.seek(0, 2)
size = self.tell()
self.seek(pos)

raw = gdal.VSIFReadL(1, size, self._fp)

if self._binary:
return bytes(raw)
else:
return raw.decode(self._encoding)

def write(self, x):

if self._binary:
assert type(x) in (bytes, bytearray, memoryview)
else:
assert type(x) is str
x = x.encode(self._encoding)

planned_write = len(x)
actual_write = gdal.VSIFWriteL(x, 1, planned_write, self._fp)

if planned_write != actual_write:
raise OSError(
f"Expected to write {planned_write} bytes but {actual_write} were written"
)

def seek(self, offset, whence=0):
if gdal.VSIFSeekL(self._fp, offset, whence) != 0:
raise OSError(gdal.VSIGetLastErrorMsg())

def tell(self):
return gdal.VSIFTellL(self._fp)


def vsi_open(path, mode="r"):
return VSIFile(path, mode)
return gdal.VSIFile(path, mode)


def vrt_has_open_support():
Expand Down
1 change: 1 addition & 0 deletions doc/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This file may be used to create an environment using:
# $ pip install --upgrade -r <this file>
fsspec
numpy
sphinx
breathe
Expand Down
20 changes: 20 additions & 0 deletions doc/source/api/python/general.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,26 @@ Error Handling
File Management
---------------

osgeo.gdal_fsspec module
++++++++++++++++++++++++

.. automodule:: osgeo.gdal_fsspec
:members:
:undoc-members:
:show-inheritance:
:noindex:

osgeo.gdal.VSIFile class
++++++++++++++++++++++++

.. autoclass:: osgeo.gdal.VSIFile
:members:
:undoc-members:
:noindex:

Low level functions
+++++++++++++++++++

.. autofunction:: osgeo.gdal.CloseDir

.. autofunction:: osgeo.gdal.CopyFile
Expand Down
Loading

0 comments on commit 81fb43a

Please sign in to comment.