From d8303e7dd5e777292c7f15e864ebc2af14e8c843 Mon Sep 17 00:00:00 2001 From: Gwo Tzu-Hsing Date: Wed, 2 Oct 2024 13:48:27 +0800 Subject: [PATCH] complete readme --- README.md | 66 ++++++++++++++++++++++++++---------------- examples/src/object.rs | 16 +++++++++- examples/src/s3.rs | 23 ++++++++------- fusio/benches/tokio.rs | 2 +- 4 files changed, 69 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index daf5970..1320ed9 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,63 @@ # Fusio -Fusio provides [Read](https://github.com/tonbo-io/fusio/blob/main/fusio/src/lib.rs#L81) / [Write](https://github.com/tonbo-io/fusio/blob/main/fusio/src/lib.rs#L63) trait to operate multiple storage (local disk, Amazon S3) on multiple poll-based ([tokio](https://github.com/tokio-rs/tokio)) / completion-based async runtime ([tokio-uring](https://github.com/tokio-rs/tokio-uring), [monoio](https://github.com/bytedance/monoio)) with: -- lean: binary size is at least 14x smaller than others -- minimal cost abstraction: compare with bare storage backend, trait definitions promise dispatching file operations without extra costs -- extensible: expose traits to support implementing storage backend as [third-party crates](https://github.com/tonbo-io/fusio/tree/main/fusio-object-store) +Fusio provides [Read](https://github.com/tonbo-io/fusio/blob/main/fusio/src/lib.rs#L81) and [Write](https://github.com/tonbo-io/fusio/blob/main/fusio/src/lib.rs#L63) traits to operate on multiple storage backends (e.g., local disk, Amazon S3) across various asynchronous runtimes—both poll-based ([tokio](https://github.com/tokio-rs/tokio)) and completion-based ([tokio-uring](https://github.com/tokio-rs/tokio-uring), [monoio](https://github.com/bytedance/monoio))—with: + +- Lean: Binary size is at least 14× smaller than others. +- Minimal-cost abstraction: Compared to bare storage backends, trait definitions allow dispatching file operations without extra overhead. +- Extensible: Exposes traits to support implementing storage backends as third-party crates. > **Fusio is now at preview version, please join our [community](https://discord.gg/j27XVFVmJM) to attend its development and semantic / behavior discussion.** -## Why need Fusio? +## Why do we need Fusio? -Since we start to integrate object storage in [Tonbo](https://github.com/tonbo-io/tonbo), we need file and file system abstractions to dispatch read and write operations to multiple storage backend: memory, local disk, remote object storage and so on. We found that the exist solution has limitations as below: -- local or remote file system accessing is not able to be usable in kinds of async runtimes (not only completion-based runime, but also Python / JS event loop) -- most of VFS implementations are designed for backend server scenarios. As an embedded database, Tonbo needs a lean implementation for embedded, and also a set of traits, allows to extend asynchronous file / file system approach as third-party crates. +Since we started integrating object storage into [Tonbo](https://github.com/tonbo-io/tonbo), we realized the need for file and file system abstractions to dispatch read and write operations to multiple storage backends: memory, local disk, remote object storage, and so on. We found that existing solutions have the following limitations: +- Accessing local or remote file systems is not usable across various kinds of asynchronous runtimes (not only completion-based runtimes but also Python / JavaScript event loops). +- Most VFS implementations are designed for backend server scenarios. As an embedded database, Tonbo requires a lean implementation suitable for embedding, along with a set of traits that allow extending asynchronous file and file system approaches as third-party crates. For more context, please check [apache/arrow-rs#6051](https://github.com/apache/arrow-rs/issues/6051). ## How to use it? -Because it is not possible to make poll-based async runtime compatible with completion-based at runtime, `fusio` supports switch runtime at compile time - ### Installation ```toml fusio = { version = "*", features = ["tokio"] } ``` ### Examples -- -## When choose fusio? +#### [Runtime agnostic](https://github.com/tonbo-io/fusio/blob/main/examples/src/multi_runtime.rs) -Targets of fusio is different with [object_store](https://github.com/apache/arrow-rs/tree/master/object_store) or [opendal](https://github.com/apache/opendal). +`fusio` supports switching the async runtime at compile time. Middleware libraries can build runtime-agnostic implementations, allowing the top-level application to choose the runtime. -### compare with `object_store` +#### [Object safety](https://github.com/tonbo-io/fusio/blob/main/examples/src/object.rs) + +`fusio` pprovides two sets of traits: +- `Read` / `Write` / `Seek` / `Fs` are not object-safe. +- `DynRead` / `DynWrite` / `DynSeek` / `DynFs` are object-safe. + +You can freely transmute between them. + +#### [File system traits](https://github.com/tonbo-io/fusio/blob/main/examples/src/fs.rs) -`object_store` is locked on [tokio](https://github.com/tokio-rs/tokio) runtime in the current, and also [bytes](https://github.com/apache/arrow-rs/blob/master/object_store/src/payload.rs#L23). `fusio` chooses completion-based like API (inspired by [monoio](https://docs.rs/monoio/latest/monoio/io/trait.AsyncReadRent.html)) to get the minimal cost abstraction in all kinds of async runtimes. +`fusio` has an optional Fs trait (use `default-features = false` to disable it). It dispatches common file system operations (open, remove, list, etc.) to specific storage backends (local disk, Amazon S3). + +#### [S3 support](https://github.com/tonbo-io/fusio/blob/main/examples/src/s3.rs) + +`fusio` has optional Amazon S3 support (enable it with `features = ["tokio-http", "aws"]`); the behavior of S3 operations and credentials does not depend on `tokio`. + +## When to choose fusio? + + Overall, `fusio` carefully selects a subset of semantics and behaviors from multiple storage backends and async runtimes to ensure native performance in most scenarios. For example, `fusio` adopts a completion-based API (inspired by [monoio](https://docs.rs/monoio/latest/monoio/io/trait.AsyncReadRent.html)) so that file operations on `tokio` and `tokio-uring` perform the same as they would without `fusio`. + +### compare with `object_store` -`fusio` also uses [IoBuf](https://github.com/tonbo-io/fusio/blob/main/fusio/src/lib.rs#L53) / [IoBufMut](https://github.com/tonbo-io/fusio/blob/main/fusio/src/lib.rs#L64) to allow `&[u8]`, `Vec` avoid potential runtime costs. If you are not aware of vendor lock-in, try `object_store`, as the official implementation, it integrates well with `arrow` and `parquet`. +`object_store` is locked to tokio and also depends on `bytes`. `fusio` uses `IoBuf` / `IoBufMut` to allow `&[u8]` and `Vec` to avoid potential runtime costs. If you do not need to consider other async runtimes, try `object_store`; as the official implementation, it integrates well with arrow and parquet. ### compare with `opendal` -`fusio` does not aim to be a full data access layer like `opendal`. `fusio` is able to enable features and their dependencies on by one. The default binary size of `fusio` is 245KB, which is much more smaller than `opendal` (8.9MB). If you need a full ecosystem of DAL (tracing, cache, etc.) try `opendal`. +`fusio` does not aim to be a full data access layer like `opendal`. `fusio` keeps features lean, and you are able to enable features and their dependencies one by one. The default binary size of `fusio` is 245KB, which is much smaller than `opendal` (8.9MB). If you need a full ecosystem of DAL (tracing, cache, etc.), try opendal. -Also, +Also, compared with `opendal::Operator`, fusio exposes core traits and allows them to be implemented in third-party crates. ## Roadmap - abstractions @@ -55,9 +71,9 @@ Also, - [x] network - [x] HTTP client trait wi - [x] network storage runtime support - - [x] tokio (base on reqwest) - - [ ] monoio (base on hyper-tls) - - [ ] tokio-uring (base on hyper-tls) + - [x] tokio (over reqwest) + - [ ] monoio (over hyper-tls) + - [ ] tokio-uring (over hyper-tls) - [x] Amazon S3 - [ ] Azure Blob Storage - [ ] Cloudflare R2 @@ -68,7 +84,7 @@ Also, - [x] object_store support ## Credits -- `monoio`: all core traits: buffer, read and write are highly inspire by it -- `futures`: how it designs abstractions and organizes several crates (core, util, etc.) to avoid coupling impact `fusio`'s design -- `opendal`: compile-time poll-based / completion-based runtime switch insipres `fusio` -- `object_store`: `fusio` copies S3 credential and path behaviors from it +- `monoio`: all core traits—buffer, read, and write—are highly inspired by it +- `futures`: its design of abstractions and organization of several crates (core, util, etc.) to avoid coupling have influenced `fusio`'s design +- `opendal`: Compile-time poll-based/completion-based runtime switching inspires `fusio` +- `object_store`: `fusio` adopts S3 credential and path behaviors from it diff --git a/examples/src/object.rs b/examples/src/object.rs index e69c3a5..ede9ffc 100644 --- a/examples/src/object.rs +++ b/examples/src/object.rs @@ -1,9 +1,23 @@ use fusio::dynamic::DynFile; use fusio::{Error, IoBuf, IoBufMut, Read, Write}; +#[allow(unused)] +#[cfg(feature = "tokio")] +async fn use_tokio_file() { + use tokio::fs::File; + + let mut file: Box = Box::new(File::open("foo.txt").await.unwrap()); + let write_buf = "hello, world".as_bytes(); + let mut read_buf = [0; 12]; + let (result, _, read_buf) = + object_safe_file_trait(&mut file, write_buf, &mut read_buf[..]).await; + result.unwrap(); + assert_eq!(&read_buf, b"hello, world"); +} + #[allow(unused)] async fn object_safe_file_trait( - mut file: Box, + mut file: &mut Box, write_buf: B, read_buf: BM, ) -> (Result<(), Error>, B, BM) diff --git a/examples/src/s3.rs b/examples/src/s3.rs index ef6584b..d6776a4 100644 --- a/examples/src/s3.rs +++ b/examples/src/s3.rs @@ -12,19 +12,20 @@ async fn use_fs() { let key_id = env::var("AWS_ACCESS_KEY_ID").unwrap(); let secret_key = env::var("AWS_SECRET_ACCESS_KEY").unwrap(); - let s3 = AmazonS3Builder::new("fusio-test".into()) - .credential(AwsCredential { - key_id, - secret_key, - token: None, - }) - .region("ap-southeast-1".into()) - .sign_payload(true) - .build(); + let s3: Arc = Arc::new( + AmazonS3Builder::new("fusio-test".into()) + .credential(AwsCredential { + key_id, + secret_key, + token: None, + }) + .region("ap-southeast-1".into()) + .sign_payload(true) + .build(), + ); - let fs: Arc = Arc::new(s3); let _ = write_without_runtime_awareness( - &mut fs.open(&"foo.txt".into()).await.unwrap(), + &mut s3.open(&"foo.txt".into()).await.unwrap(), "hello, world".as_bytes(), &mut [0; 12][..], ) diff --git a/fusio/benches/tokio.rs b/fusio/benches/tokio.rs index 8fddadf..a7f5cc0 100644 --- a/fusio/benches/tokio.rs +++ b/fusio/benches/tokio.rs @@ -5,7 +5,7 @@ use fusio::{ disk::TokioFs, fs::{Fs, OpenOptions}, path::Path, - IoBuf, IoBufMut, Write, + Write, }; use rand::Rng; use tempfile::NamedTempFile;