From fb14c334dd2236acd85823c816f4e9e8466aaf73 Mon Sep 17 00:00:00 2001 From: Olivier Le Doeuff Date: Fri, 17 Jan 2025 11:23:46 +0100 Subject: [PATCH 1/5] doc: Update `annotation-context.rs` to use correct API (#8708) Co-authored-by: Antoine Beyeler Co-authored-by: Clement Rey --- docs/content/concepts/annotation-context.md | 2 +- .../all/tutorials/annotation-context.rs | 56 ------------------- .../all/tutorials/annotation_context.cpp | 38 +++++++++++++ ...ation-context.py => annotation_context.py} | 4 +- .../all/tutorials/annotation_context.rs | 40 +++++++++++++ docs/snippets/snippets.toml | 5 -- 6 files changed, 82 insertions(+), 63 deletions(-) delete mode 100644 docs/snippets/all/tutorials/annotation-context.rs create mode 100644 docs/snippets/all/tutorials/annotation_context.cpp rename docs/snippets/all/tutorials/{annotation-context.py => annotation_context.py} (89%) create mode 100644 docs/snippets/all/tutorials/annotation_context.rs diff --git a/docs/content/concepts/annotation-context.md b/docs/content/concepts/annotation-context.md index cd89d4cee311..a70f22d224d3 100644 --- a/docs/content/concepts/annotation-context.md +++ b/docs/content/concepts/annotation-context.md @@ -56,7 +56,7 @@ Annotation contexts are logged with: * Python: 🐍[`rr.AnnotationContext`](https://ref.rerun.io/docs/python/stable/common/archetypes/#rerun.archetypes.AnnotationContext) * Rust: 🦀[`rerun::AnnotationContext`](https://docs.rs/rerun/latest/rerun/archetypes/struct.AnnotationContext.html#) -snippet: tutorials/annotation-context +snippet: tutorials/annotation_context ## Affected entities diff --git a/docs/snippets/all/tutorials/annotation-context.rs b/docs/snippets/all/tutorials/annotation-context.rs deleted file mode 100644 index fa5bab0a4c96..000000000000 --- a/docs/snippets/all/tutorials/annotation-context.rs +++ /dev/null @@ -1,56 +0,0 @@ -// Annotation context with two classes, using two labeled classes, of which ones defines a color. -MsgSender::new("masks") // Applies to all entities below "masks". - .with_static(true) - .with_component(&[AnnotationContext { - class_map: [ - ClassDescription { - info: AnnotationInfo { - id: 0, - label: Some(Label("Background".into())), - color: None, - }, - ..Default::default() - }, - ClassDescription { - info: AnnotationInfo { - id: 0, - label: Some(Label("Person".into())), - color: Some(Color(0xFF000000)), - }, - ..Default::default() - }, - ] - .into_iter() - .map(|class| (ClassId(class.info.id), class)) - .collect(), - }])? - .send(rec)?; - -// Annotation context with simple keypoints & keypoint connections. -MsgSender::new("detections") // Applies to all entities below "detections". - .with_static(true) - .with_component(&[AnnotationContext { - class_map: std::iter::once(( - ClassId(0), - ClassDescription { - info: AnnotationInfo { - id: 0, - label: Some(Label("Snake".into())), - color: None, - }, - keypoint_map: (0..10) - .map(|i| AnnotationInfo { - id: i, - label: None, - color: Some(Color::from_rgb(0, (255 / 9 * i) as u8, 0)), - }) - .map(|keypoint| (KeypointId(keypoint.id), keypoint)) - .collect(), - keypoint_connections: (0..9) - .map(|i| (KeypointId(i), KeypointId(i + 1))) - .collect(), - }, - )) - .collect(), - }])? - .send(rec)?; diff --git a/docs/snippets/all/tutorials/annotation_context.cpp b/docs/snippets/all/tutorials/annotation_context.cpp new file mode 100644 index 000000000000..d4dde5c3f0ba --- /dev/null +++ b/docs/snippets/all/tutorials/annotation_context.cpp @@ -0,0 +1,38 @@ +#include + +int main() { + const auto rec = rerun::RecordingStream("rerun_example_annotation_context_connections"); + rec.spawn().exit_on_failure(); + + // Annotation context with two classes, using two labeled classes, of which ones defines a + // color. + rec.log_static( + "masks", + rerun::AnnotationContext({ + rerun::AnnotationInfo(0, "Background"), + rerun::AnnotationInfo(1, "Person", rerun::Rgba32(255, 0, 0)), + }) + ); + + // Annotation context with simple keypoints & keypoint connections. + std::vector keypoint_annotations; + for (uint16_t i = 0; i < 10; ++i) { + keypoint_annotations.push_back( + rerun::AnnotationInfo(i, rerun::Rgba32(0, static_cast(28 * i), 0)) + ); + } + + std::vector keypoint_connections; + for (uint16_t i = 0; i < 9; ++i) { + keypoint_connections.push_back(rerun::KeypointPair(i, i + 1)); + } + + rec.log_static( + "detections", // Applies to all entities below "detections". + rerun::AnnotationContext({rerun::ClassDescription( + rerun::AnnotationInfo(0, "Snake"), + keypoint_annotations, + keypoint_connections + )}) + ); +} diff --git a/docs/snippets/all/tutorials/annotation-context.py b/docs/snippets/all/tutorials/annotation_context.py similarity index 89% rename from docs/snippets/all/tutorials/annotation-context.py rename to docs/snippets/all/tutorials/annotation_context.py index 4931b28db951..62c4d6d94472 100644 --- a/docs/snippets/all/tutorials/annotation-context.py +++ b/docs/snippets/all/tutorials/annotation_context.py @@ -1,5 +1,7 @@ import rerun as rr +rr.init("rerun_example_annotation_context_connections") + # Annotation context with two classes, using two labeled classes, of which ones defines a color. rr.log( "masks", # Applies to all entities below "masks". @@ -17,7 +19,7 @@ "detections", # Applies to all entities below "detections". rr.ClassDescription( info=rr.AnnotationInfo(0, label="Snake"), - keypoint_annotations=[rr.AnnotationInfo(id=i, color=(0, 255 / 9 * i, 0)) for i in range(10)], + keypoint_annotations=[rr.AnnotationInfo(id=i, color=(0, 28 * i, 0)) for i in range(10)], keypoint_connections=[(i, i + 1) for i in range(9)], ), static=True, diff --git a/docs/snippets/all/tutorials/annotation_context.rs b/docs/snippets/all/tutorials/annotation_context.rs new file mode 100644 index 000000000000..9c105d574749 --- /dev/null +++ b/docs/snippets/all/tutorials/annotation_context.rs @@ -0,0 +1,40 @@ +use rerun::{ + datatypes::{ClassDescriptionMapElem, KeypointId}, + AnnotationContext, AnnotationInfo, ClassDescription, Rgba32, +}; + +fn main() -> Result<(), Box> { + let rec = rerun::RecordingStreamBuilder::new("rerun_example_annotation_context_connections") + .spawn()?; + + // Annotation context with two classes, using two labeled classes, of which ones defines a + // color. + rec.log_static( + "masks", // Applies to all entities below "masks". + &AnnotationContext::new([ + ClassDescriptionMapElem::from((0, "Background")), + ClassDescriptionMapElem::from((1, "Person", Rgba32::from_rgb(255, 0, 0))), + ]), + )?; + + // Annotation context with simple keypoints & keypoint connections. + rec.log_static( + "detections", // Applies to all entities below "detections". + &AnnotationContext::new([ClassDescription { + info: (0, "Snake").into(), + keypoint_annotations: (0..10) + .map(|i| AnnotationInfo { + id: i, + label: None, + color: Some(Rgba32::from_rgb(0, (28 * i) as u8, 0)), + }) + .collect(), + keypoint_connections: (0..9) + .map(|i| (KeypointId(i), KeypointId(i + 1))) + .map(Into::into) + .collect(), + }]), + )?; + + Ok(()) +} diff --git a/docs/snippets/snippets.toml b/docs/snippets/snippets.toml index 429c138395f6..98df865bfb43 100644 --- a/docs/snippets/snippets.toml +++ b/docs/snippets/snippets.toml @@ -127,11 +127,6 @@ "rust", "py", ] -"tutorials/annotation-context" = [ # Not a complete example - "cpp", - "rust", - "py", -] "tutorials/any_values" = [ # Not yet implemented "cpp", ] From 4db780c76e0d9990cb7118b20f3e4ac68f96aa33 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 17 Jan 2025 11:30:31 +0100 Subject: [PATCH 2/5] Pin ci machines with Vulkan setup to Ubuntu 22.04 (#8721) `ubuntu-latest` got updated to imply Ubuntu 24. This makes our software rasterizer script fail with ``` ERROR: [Loader Message] Code 0 : libLLVM-15.so.1: cannot open shared object file: No such file or directory ERROR: libLLVM-15.so.1: cannot open shared object file: No such file or directory ``` when running vulkaninfo. Surely we can fix this by installing the necessary dependencies, but for now let's just pin this to the previous OS version. --- .github/workflows/contrib_checks.yml | 6 ++++-- .github/workflows/reusable_checks_rust.yml | 3 ++- crates/utils/re_video/src/decode/mod.rs | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/contrib_checks.yml b/.github/workflows/contrib_checks.yml index 44f68ed6d9ab..ddaad0e4dc06 100644 --- a/.github/workflows/contrib_checks.yml +++ b/.github/workflows/contrib_checks.yml @@ -76,7 +76,8 @@ jobs: no-codegen-changes: name: Check if running codegen would produce any changes - runs-on: ubuntu-latest-16-cores + # TODO(andreas): setup-vulkan doesn't work on 24.4 right now due to missing .so + runs-on: ubuntu-22.04-large steps: # Note: We explicitly don't override `ref` here. We need to see if changes would be made # in a context where we have merged with main. Otherwise we might miss changes such as one @@ -92,7 +93,8 @@ jobs: rs-lints: name: Rust lints (fmt, check, clippy, tests, doc) - runs-on: ubuntu-latest-16-cores + # TODO(andreas): setup-vulkan doesn't work on 24.4 right now due to missing .so + runs-on: ubuntu-22.04-16core steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/reusable_checks_rust.yml b/.github/workflows/reusable_checks_rust.yml index 5d42487f45b3..3140ac967af1 100644 --- a/.github/workflows/reusable_checks_rust.yml +++ b/.github/workflows/reusable_checks_rust.yml @@ -52,7 +52,8 @@ jobs: rs-lints: name: Rust lints (fmt, check, clippy, tests, doc) - runs-on: ubuntu-latest-16-cores + # TODO(andreas): setup-vulkan doesn't work on 24.4 right now due to missing .so + runs-on: ubuntu-22.04-large steps: - uses: actions/checkout@v4 with: diff --git a/crates/utils/re_video/src/decode/mod.rs b/crates/utils/re_video/src/decode/mod.rs index b8f4f3db1322..52e092b71d3a 100644 --- a/crates/utils/re_video/src/decode/mod.rs +++ b/crates/utils/re_video/src/decode/mod.rs @@ -337,7 +337,7 @@ pub enum PixelFormat { Yuv { layout: YuvPixelLayout, range: YuvRange, - // TODO(andreas): color primaries should also apply to RGB data, + // TODO(andreas): Color primaries should also apply to RGB data, // but for now we just always assume RGB to be BT.709 ~= sRGB. coefficients: YuvMatrixCoefficients, // Note that we don't handle chroma sample location at all so far. From a0c8c368096acc6c193a43c98decc88f13048ebc Mon Sep 17 00:00:00 2001 From: Antoine Beyeler <49431240+abey79@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:33:43 +0100 Subject: [PATCH 3/5] Fix CI (#8722) --- docs/snippets/INDEX.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/snippets/INDEX.md b/docs/snippets/INDEX.md index ff1d6402e89d..c34be42a0af0 100644 --- a/docs/snippets/INDEX.md +++ b/docs/snippets/INDEX.md @@ -39,11 +39,11 @@ _All snippets, organized by the [`Archetype`](https://rerun.io/docs/reference/ty | Archetype | Snippet | Description | Python | Rust | C++ | | --------- | ------- | ----------- | ------ | ---- | --- | +| **[`AnnotationContext`](https://rerun.io/docs/reference/types/archetypes/annotation_context)** | `tutorials/annotation_context` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/archetypes/annotation_context)** | `archetypes/annotation_context_segmentation` | Log a segmentation image with annotations | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_segmentation.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_segmentation.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_segmentation.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/archetypes/annotation_context)** | `archetypes/annotation_context_rects` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_rects.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_rects.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_rects.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/archetypes/annotation_context)** | `archetypes/annotation_context_connections` | Log annotation context with connections between keypoints | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_connections.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_connections.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_connections.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/archetypes/annotation_context)** | `archetypes/segmentation_image_simple` | Create and log a segmentation image | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/segmentation_image_simple.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/segmentation_image_simple.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/segmentation_image_simple.cpp) | -| **[`AnnotationContext`](https://rerun.io/docs/reference/types/archetypes/annotation_context)** | `tutorials/annotation-context` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation-context.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation-context.rs) | | | **[`Arrows2D`](https://rerun.io/docs/reference/types/archetypes/arrows2d)** | `archetypes/arrows2d_simple` | Log a batch of 2D arrows | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/arrows2d_simple.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/arrows2d_simple.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/arrows2d_simple.cpp) | | **[`Arrows3D`](https://rerun.io/docs/reference/types/archetypes/arrows3d)** | `archetypes/arrows3d_simple` | Log a batch of 3D arrows | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/arrows3d_simple.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/arrows3d_simple.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/arrows3d_simple.cpp) | | **[`Arrows3D`](https://rerun.io/docs/reference/types/archetypes/arrows3d)** | `archetypes/clear_recursive` | Log and then clear data recursively | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/clear_recursive.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/clear_recursive.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/clear_recursive.cpp) | @@ -196,11 +196,11 @@ _All snippets, organized by the [`Component`](https://rerun.io/docs/reference/ty | Component | Snippet | Description | Python | Rust | C++ | | --------- | ------- | ----------- | ------ | ---- | --- | +| **[`AnnotationContext`](https://rerun.io/docs/reference/types/components/annotation_context)** | `tutorials/annotation_context` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/components/annotation_context)** | `archetypes/annotation_context_segmentation` | Log a segmentation image with annotations | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_segmentation.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_segmentation.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_segmentation.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/components/annotation_context)** | `archetypes/annotation_context_rects` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_rects.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_rects.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_rects.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/components/annotation_context)** | `archetypes/annotation_context_connections` | Log annotation context with connections between keypoints | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_connections.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_connections.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/annotation_context_connections.cpp) | | **[`AnnotationContext`](https://rerun.io/docs/reference/types/components/annotation_context)** | `archetypes/segmentation_image_simple` | Create and log a segmentation image | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/segmentation_image_simple.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/segmentation_image_simple.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/segmentation_image_simple.cpp) | -| **[`AnnotationContext`](https://rerun.io/docs/reference/types/components/annotation_context)** | `tutorials/annotation-context` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation-context.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation-context.rs) | | | **[`ClassId`](https://rerun.io/docs/reference/types/components/class_id)** | `archetypes/points3d_partial_updates_legacy` | Demonstrates usage of the new partial updates APIs | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_partial_updates_legacy.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_partial_updates_legacy.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_partial_updates_legacy.cpp) | | **[`Color`](https://rerun.io/docs/reference/types/components/color)** | `archetypes/points3d_partial_updates_legacy` | Demonstrates usage of the new partial updates APIs | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_partial_updates_legacy.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_partial_updates_legacy.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_partial_updates_legacy.cpp) | | **[`Color`](https://rerun.io/docs/reference/types/components/color)** | `archetypes/points3d_send_columns` | Use the `send_columns` API to send several point clouds over time in a single call | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_send_columns.py) | | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/archetypes/points3d_send_columns.cpp) | @@ -327,7 +327,7 @@ _All snippets, organized by the blueprint-related [`Archetype`](https://rerun.io | Archetype | Snippet | Description | Python | Rust | C++ | | --------- | ------- | ----------- | ------ | ---- | --- | -| **`Background`** | `tutorials/annotation-context` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation-context.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation-context.rs) | | +| **`Background`** | `tutorials/annotation_context` | | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.py) | [🦀](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.rs) | [🌊](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/tutorials/annotation_context.cpp) | | **`DataframeQuery`** | `reference/dataframe_view_query` | Query and display the first 10 rows of a recording in a dataframe view | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/reference/dataframe_view_query.py) | | | | **`DataframeQuery`** | `views/dataframe` | Use a blueprint to customize a DataframeView | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/views/dataframe.py) | | | | **`LineGrid3D`** | `views/spatial3d` | Use a blueprint to customize a Spatial3DView | [🐍](https://github.com/rerun-io/rerun/blob/main/docs/snippets/all/views/spatial3d.py) | | | From fdf065f693d42e3287277c1b152aa79853158371 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 17 Jan 2025 11:56:40 +0100 Subject: [PATCH 4/5] Even less arrow2 (#8719) * Part of #3741 Just one small piece at a time --- Cargo.lock | 5 +- crates/store/re_chunk/src/helpers.rs | 54 ++++------- crates/store/re_chunk/src/slice.rs | 36 ++++---- crates/store/re_chunk_store/Cargo.toml | 3 +- crates/store/re_chunk_store/src/dataframe.rs | 88 +++++++----------- .../src/protobuf_conversions.rs | 33 +++---- crates/store/re_chunk_store/src/store.rs | 9 -- crates/store/re_dataframe/src/query.rs | 92 ++++++------------- .../re_protos/proto/rerun/v0/common.proto | 36 +++++--- .../re_protos/proto/rerun/v0/log_msg.proto | 2 +- .../store/re_protos/src/v0/rerun.common.v0.rs | 36 +++++--- .../re_protos/src/v0/rerun.log_msg.v0.rs | 2 +- crates/store/re_query/Cargo.toml | 1 + crates/store/re_query/examples/latest_at.rs | 14 +-- crates/store/re_query/src/latest_at.rs | 28 ++---- rerun_py/Cargo.toml | 1 - rerun_py/src/arrow.rs | 71 +++++++------- rerun_py/src/python_bridge.rs | 4 +- rerun_py/src/video.rs | 8 +- 19 files changed, 206 insertions(+), 317 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b749629b341f..bd18f4c592f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5605,8 +5605,6 @@ dependencies = [ "hashbrown 0.14.5", "num-traits", "rustc_version", - "serde", - "serde_derive", "simdutf8", ] @@ -5748,7 +5746,6 @@ dependencies = [ "re_tracing", "re_types", "re_types_core", - "serde_json", "similar-asserts", "thiserror 1.0.65", "tinyvec", @@ -6234,6 +6231,7 @@ version = "0.22.0-alpha.1+dev" dependencies = [ "ahash", "anyhow", + "arrow", "backtrace", "bytemuck", "criterion", @@ -7418,7 +7416,6 @@ dependencies = [ "pyo3", "pyo3-build-config", "rand", - "re_arrow2", "re_arrow_util", "re_build_info", "re_build_tools", diff --git a/crates/store/re_chunk/src/helpers.rs b/crates/store/re_chunk/src/helpers.rs index 710eba1b6eab..cfeb427c29e1 100644 --- a/crates/store/re_chunk/src/helpers.rs +++ b/crates/store/re_chunk/src/helpers.rs @@ -1,7 +1,7 @@ use std::sync::Arc; -use arrow::array::ArrayRef; -use arrow2::array::Array as Arrow2Array; +use arrow::array::ArrayRef as ArrowArrayRef; +use arrow2::array::Array as _; use re_log_types::{TimeInt, Timeline}; use re_types_core::{Component, ComponentName}; @@ -21,26 +21,13 @@ impl Chunk { &self, component_name: &ComponentName, row_index: usize, - ) -> Option> { - self.component_batch_raw_arrow2(component_name, row_index) - .map(|res| res.map(|array| array.into())) - } - - /// Returns the raw data for the specified component. - /// - /// Returns an error if the row index is out of bounds. - #[inline] - fn component_batch_raw_arrow2( - &self, - component_name: &ComponentName, - row_index: usize, - ) -> Option>> { + ) -> Option> { self.get_first_component(component_name) .and_then(|list_array| { if list_array.len() > row_index { list_array .is_valid(row_index) - .then(|| Ok(list_array.value(row_index))) + .then(|| Ok(list_array.value(row_index).into())) } else { Some(Err(crate::ChunkError::IndexOutOfBounds { kind: "row".to_owned(), @@ -78,7 +65,7 @@ impl Chunk { component_name: &ComponentName, row_index: usize, instance_index: usize, - ) -> Option> { + ) -> Option> { let res = self.component_batch_raw(component_name, row_index)?; let array = match res { @@ -131,7 +118,7 @@ impl Chunk { &self, component_name: &ComponentName, row_index: usize, - ) -> Option> { + ) -> Option> { let res = self.component_batch_raw(component_name, row_index)?; let array = match res { @@ -276,20 +263,11 @@ impl UnitChunkShared { /// Returns the raw data for the specified component. #[inline] - pub fn component_batch_raw(&self, component_name: &ComponentName) -> Option { - self.component_batch_raw_arrow2(component_name) - .map(|array| array.into()) - } - - /// Returns the raw data for the specified component. - #[inline] - pub fn component_batch_raw_arrow2( - &self, - component_name: &ComponentName, - ) -> Option> { + pub fn component_batch_raw(&self, component_name: &ComponentName) -> Option { debug_assert!(self.num_rows() == 1); self.get_first_component(component_name) .and_then(|list_array| list_array.is_valid(0).then(|| list_array.value(0))) + .map(|array| array.into()) } /// Returns the deserialized data for the specified component. @@ -311,10 +289,10 @@ impl UnitChunkShared { &self, component_name: &ComponentName, instance_index: usize, - ) -> Option>> { - let array = self.component_batch_raw_arrow2(component_name)?; + ) -> Option> { + let array = self.component_batch_raw(component_name)?; if array.len() > instance_index { - Some(Ok(array.sliced(instance_index, 1))) + Some(Ok(array.slice(instance_index, 1))) } else { Some(Err(crate::ChunkError::IndexOutOfBounds { kind: "instance".to_owned(), @@ -335,7 +313,7 @@ impl UnitChunkShared { let res = self.component_instance_raw(&C::name(), instance_index)?; let array = match res { - Ok(array) => ArrayRef::from(array), + Ok(array) => array, Err(err) => return Some(Err(err)), }; @@ -354,10 +332,10 @@ impl UnitChunkShared { pub fn component_mono_raw( &self, component_name: &ComponentName, - ) -> Option>> { - let array = self.component_batch_raw_arrow2(component_name)?; + ) -> Option> { + let array = self.component_batch_raw(component_name)?; if array.len() == 1 { - Some(Ok(array.sliced(0, 1))) + Some(Ok(array.slice(0, 1))) } else { Some(Err(crate::ChunkError::IndexOutOfBounds { kind: "mono".to_owned(), @@ -375,7 +353,7 @@ impl UnitChunkShared { let res = self.component_mono_raw(&C::name())?; let array = match res { - Ok(array) => ArrayRef::from(array), + Ok(array) => array, Err(err) => return Some(Err(err)), }; diff --git a/crates/store/re_chunk/src/slice.rs b/crates/store/re_chunk/src/slice.rs index dc233225bf34..6add6bbad8fa 100644 --- a/crates/store/re_chunk/src/slice.rs +++ b/crates/store/re_chunk/src/slice.rs @@ -1,13 +1,11 @@ +use arrow::array::ArrayRef as ArrowArrayRef; use arrow2::array::{ Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, ListArray as Arrow2ListArray, }; - use itertools::Itertools; use nohash_hasher::IntSet; -use re_arrow_util::arrow2_util; -use re_arrow_util::arrow_util; -use re_arrow_util::Arrow2ArrayDowncastRef as _; +use re_arrow_util::{arrow2_util, arrow_util, Arrow2ArrayDowncastRef as _}; use re_log_types::Timeline; use re_types_core::{ComponentDescriptor, ComponentName}; @@ -28,7 +26,7 @@ impl Chunk { &self, row_id: RowId, component_desc: &ComponentDescriptor, - ) -> Option> { + ) -> Option { let list_array = self .components .get(&component_desc.component_name) @@ -51,11 +49,15 @@ impl Chunk { let found_it = times.get(index) == Some(&row_id_time_ns) && incs.get(index) == Some(&row_id_inc); - (found_it && list_array.is_valid(index)).then(|| list_array.value(index)) + (found_it && list_array.is_valid(index)).then(|| list_array.value(index).into()) } else { self.row_ids() .find_position(|id| *id == row_id) - .and_then(|(index, _)| list_array.is_valid(index).then(|| list_array.value(index))) + .and_then(|(index, _)| { + list_array + .is_valid(index) + .then(|| list_array.value(index).into()) + }) } } @@ -1002,8 +1004,8 @@ mod tests { assert!(!chunk.is_sorted()); for (row_id, component_desc, expected) in expectations { - let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + let expected = + expected.and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_desc} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_desc)); } @@ -1012,8 +1014,8 @@ mod tests { assert!(chunk.is_sorted()); for (row_id, component_desc, expected) in expectations { - let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + let expected = + expected.and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_desc} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_desc)); } @@ -1131,7 +1133,7 @@ mod tests { for (row_id, component_desc, expected) in expectations { let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + .and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_desc} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_desc)); } @@ -1162,7 +1164,7 @@ mod tests { for (row_id, component_desc, expected) in expectations { let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + .and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_desc} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_desc)); } @@ -1258,7 +1260,7 @@ mod tests { for (row_id, component_name, expected) in expectations { let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + .and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_name} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_name)); } @@ -1277,7 +1279,7 @@ mod tests { for (row_id, component_name, expected) in expectations { let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + .and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_name} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_name)); } @@ -1404,7 +1406,7 @@ mod tests { for (row_id, component_name, expected) in expectations { let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + .and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_name} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_name)); } @@ -1551,7 +1553,7 @@ mod tests { for (row_id, component_name, expected) in expectations { let expected = expected - .and_then(|expected| re_types_core::LoggableBatch::to_arrow2(expected).ok()); + .and_then(|expected| re_types_core::LoggableBatch::to_arrow(expected).ok()); eprintln!("{component_name} @ {row_id}"); similar_asserts::assert_eq!(expected, chunk.cell(*row_id, component_name)); } diff --git a/crates/store/re_chunk_store/Cargo.toml b/crates/store/re_chunk_store/Cargo.toml index eb9a6c598e3a..bb712e49da6a 100644 --- a/crates/store/re_chunk_store/Cargo.toml +++ b/crates/store/re_chunk_store/Cargo.toml @@ -43,14 +43,13 @@ re_types_core.workspace = true ahash.workspace = true anyhow.workspace = true arrow.workspace = true -arrow2 = { workspace = true, features = ["compute_concatenate", "serde_types"] } +arrow2 = { workspace = true, features = ["compute_concatenate"] } document-features.workspace = true indent.workspace = true itertools.workspace = true nohash-hasher.workspace = true once_cell.workspace = true parking_lot = { workspace = true, features = ["arc_lock"] } -serde_json.workspace = true thiserror.workspace = true web-time.workspace = true diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index 3ff35d366dca..d1b9bd183b94 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -1,13 +1,13 @@ //! All the APIs used specifically for `re_dataframe`. -use std::collections::{BTreeMap, BTreeSet}; -use std::ops::Deref; -use std::ops::DerefMut; +use std::{ + collections::{BTreeMap, BTreeSet}, + ops::{Deref, DerefMut}, +}; -use arrow::datatypes::Field as ArrowField; -use arrow2::{ +use arrow::{ array::ListArray as ArrowListArray, - datatypes::{DataType as Arrow2Datatype, Field as Arrow2Field}, + datatypes::{DataType as ArrowDatatype, Field as ArrowField}, }; use itertools::Itertools; @@ -44,7 +44,7 @@ impl ColumnDescriptor { } #[inline] - pub fn arrow2_datatype(&self) -> Arrow2Datatype { + pub fn arrow_datatype(&self) -> ArrowDatatype { match self { Self::Time(descr) => descr.datatype.clone(), Self::Component(descr) => descr.returned_datatype(), @@ -59,14 +59,6 @@ impl ColumnDescriptor { } } - #[inline] - pub fn to_arrow2_field(&self) -> Arrow2Field { - match self { - Self::Time(descr) => descr.to_arrow2_field(), - Self::Component(descr) => descr.to_arrow2_field(), - } - } - #[inline] pub fn short_name(&self) -> String { match self { @@ -91,7 +83,7 @@ pub struct TimeColumnDescriptor { pub timeline: Timeline, /// The Arrow datatype of the column. - pub datatype: Arrow2Datatype, + pub datatype: ArrowDatatype, } impl PartialOrd for TimeColumnDescriptor { @@ -120,7 +112,7 @@ impl TimeColumnDescriptor { // TODO(cmc): I picked a sequence here because I have to pick something. // It doesn't matter, only the name will remain in the Arrow schema anyhow. timeline: Timeline::new_sequence(name), - datatype: Arrow2Datatype::Null, + datatype: ArrowDatatype::Null, } } @@ -140,35 +132,25 @@ impl TimeColumnDescriptor { } #[inline] - pub fn datatype(&self) -> &Arrow2Datatype { + pub fn datatype(&self) -> &ArrowDatatype { &self.datatype } - fn metadata(&self) -> arrow2::datatypes::Metadata { - let Self { - timeline, - datatype: _, - } = self; + #[inline] + pub fn to_arrow_field(&self) -> ArrowField { + let Self { timeline, datatype } = self; - std::iter::once(Some(( + let nullable = true; // Time column must be nullable since static data doesn't have a time. + + let metadata = std::iter::once(Some(( "sorbet.index_name".to_owned(), timeline.name().to_string(), ))) .flatten() - .collect() - } + .collect(); - #[inline] - pub fn to_arrow_field(&self) -> ArrowField { - self.to_arrow2_field().into() - } - - #[inline] - pub fn to_arrow2_field(&self) -> Arrow2Field { - let Self { timeline, datatype } = self; - let nullable = true; // Time column must be nullable since static data doesn't have a time. - Arrow2Field::new(timeline.name().to_string(), datatype.clone(), nullable) - .with_metadata(self.metadata()) + ArrowField::new(timeline.name().to_string(), datatype.clone(), nullable) + .with_metadata(metadata) } } @@ -208,7 +190,7 @@ pub struct ComponentColumnDescriptor { /// This is the log-time datatype corresponding to how this data is encoded /// in a chunk. Currently this will always be an [`ArrowListArray`], but as /// we introduce mono-type optimization, this might be a native type instead. - pub store_datatype: Arrow2Datatype, + pub store_datatype: ArrowDatatype, /// Whether this column represents static data. pub is_static: bool, @@ -320,7 +302,7 @@ impl ComponentColumnDescriptor { &self.entity_path == entity_path && self.component_name.matches(component_name) } - fn metadata(&self) -> arrow2::datatypes::Metadata { + fn metadata(&self) -> std::collections::HashMap { let Self { entity_path, archetype_name, @@ -360,17 +342,12 @@ impl ComponentColumnDescriptor { } #[inline] - pub fn returned_datatype(&self) -> Arrow2Datatype { + pub fn returned_datatype(&self) -> ArrowDatatype { self.store_datatype.clone() } #[inline] pub fn to_arrow_field(&self) -> ArrowField { - self.to_arrow2_field().into() - } - - #[inline] - pub fn to_arrow2_field(&self) -> Arrow2Field { let entity_path = &self.entity_path; let descriptor = ComponentDescriptor { archetype_name: self.archetype_name, @@ -378,7 +355,7 @@ impl ComponentColumnDescriptor { component_name: self.component_name, }; - Arrow2Field::new( + ArrowField::new( // NOTE: Uncomment this to expose fully-qualified names in the Dataframe APIs! // I'm not doing that right now, to avoid breaking changes (and we need to talk about // what the syntax for these fully-qualified paths need to look like first). @@ -754,7 +731,7 @@ impl ChunkStore { let timelines = self.all_timelines_sorted().into_iter().map(|timeline| { ColumnDescriptor::Time(TimeColumnDescriptor { timeline, - datatype: timeline.datatype().into(), + datatype: timeline.datatype(), }) }); @@ -769,7 +746,7 @@ impl ChunkStore { .filter_map(|(entity_path, component_descr)| { let metadata = self.lookup_column_metadata(entity_path, &component_descr.component_name)?; - let datatype = self.lookup_datatype_arrow2(&component_descr.component_name)?; + let datatype = self.lookup_datatype(&component_descr.component_name)?; Some(((entity_path, component_descr), (metadata, datatype))) }) @@ -789,7 +766,9 @@ impl ChunkStore { // NOTE: The data is always a at least a list, whether it's latest-at or range. // It might be wrapped further in e.g. a dict, but at the very least // it's a list. - store_datatype: ArrowListArray::::default_datatype(datatype.clone()), + store_datatype: ArrowListArray::DATA_TYPE_CONSTRUCTOR( + ArrowField::new("item", datatype.clone(), true).into(), + ), is_static, is_indicator, is_tombstone, @@ -828,7 +807,7 @@ impl ChunkStore { TimeColumnDescriptor { timeline, - datatype: timeline.datatype().into(), + datatype: timeline.datatype(), } } @@ -879,16 +858,17 @@ impl ChunkStore { }); let datatype = self - .lookup_datatype_arrow2(&component_name) - .cloned() - .unwrap_or(Arrow2Datatype::Null); + .lookup_datatype(&component_name) + .unwrap_or(ArrowDatatype::Null); ComponentColumnDescriptor { entity_path: selector.entity_path.clone(), archetype_name: component_descr.and_then(|descr| descr.archetype_name), archetype_field_name: component_descr.and_then(|descr| descr.archetype_field_name), component_name, - store_datatype: ArrowListArray::::default_datatype(datatype.clone()), + store_datatype: ArrowListArray::DATA_TYPE_CONSTRUCTOR( + ArrowField::new("item", datatype, true).into(), + ), is_static, is_indicator, is_tombstone, diff --git a/crates/store/re_chunk_store/src/protobuf_conversions.rs b/crates/store/re_chunk_store/src/protobuf_conversions.rs index 020cc3523106..1cf1ec3cd25e 100644 --- a/crates/store/re_chunk_store/src/protobuf_conversions.rs +++ b/crates/store/re_chunk_store/src/protobuf_conversions.rs @@ -270,9 +270,7 @@ impl TryFrom for re_protos::common::v0::ColumnDescripto timeline: Some(re_protos::common::v0::Timeline { name: time_descriptor.timeline.name().to_string(), }), - datatype: serde_json::to_string(&time_descriptor.datatype).map_err( - |err| invalid_field!(Self, "time column descriptor", err), - )?, + datatype: time_descriptor.datatype.to_string(), // TODO(emilk): use arrow IPC instead }, ), ), @@ -289,10 +287,7 @@ impl TryFrom for re_protos::common::v0::ColumnDescripto .archetype_field_name .map(|afn| afn.to_string()), component_name: component_descriptor.component_name.to_string(), - datatype: serde_json::to_string(&component_descriptor.store_datatype) - .map_err(|err| { - invalid_field!(Self, "component column descriptor", err) - })?, + datatype: component_descriptor.store_datatype.to_string(), // TODO(emilk): use arrow IPC instead is_static: component_descriptor.is_static, is_tombstone: component_descriptor.is_tombstone, is_semantically_empty: component_descriptor.is_semantically_empty, @@ -325,7 +320,7 @@ impl TryFrom for crate::ColumnDescripto "timeline", ))? .into(), - datatype: serde_json::from_str(&time_descriptor.datatype).map_err(|err| { + datatype: time_descriptor.datatype.parse().map_err(|err| { invalid_field!( re_protos::common::v0::ColumnDescriptor, "time column descriptor", @@ -346,15 +341,13 @@ impl TryFrom for crate::ColumnDescripto archetype_name: component_descriptor.archetype_name.map(Into::into), archetype_field_name: component_descriptor.archetype_field_name.map(Into::into), component_name: component_descriptor.component_name.into(), - store_datatype: serde_json::from_str(&component_descriptor.datatype).map_err( - |err| { - invalid_field!( - re_protos::common::v0::ColumnDescriptor, - "component column descriptor", - err - ) - }, - )?, + store_datatype: component_descriptor.datatype.parse().map_err(|err| { + invalid_field!( + re_protos::common::v0::ColumnDescriptor, + "component column descriptor", + err + ) + })?, is_static: component_descriptor.is_static, is_tombstone: component_descriptor.is_tombstone, is_semantically_empty: component_descriptor.is_semantically_empty, @@ -447,8 +440,8 @@ mod tests { fn test_time_column_descriptor_conversion() { let time_descriptor = crate::TimeColumnDescriptor { timeline: crate::Timeline::log_time(), - datatype: arrow2::datatypes::DataType::Timestamp( - arrow2::datatypes::TimeUnit::Nanosecond, + datatype: arrow::datatypes::DataType::Timestamp( + arrow::datatypes::TimeUnit::Nanosecond, None, ), }; @@ -472,7 +465,7 @@ mod tests { archetype_name: Some("archetype".to_owned().into()), archetype_field_name: Some("field".to_owned().into()), component_name: re_chunk::ComponentName::new("component"), - store_datatype: arrow2::datatypes::DataType::Int64, + store_datatype: arrow::datatypes::DataType::Int64, is_static: true, is_tombstone: false, is_semantically_empty: false, diff --git a/crates/store/re_chunk_store/src/store.rs b/crates/store/re_chunk_store/src/store.rs index a5afbfa9f7a9..36d67a4a8702 100644 --- a/crates/store/re_chunk_store/src/store.rs +++ b/crates/store/re_chunk_store/src/store.rs @@ -646,15 +646,6 @@ impl ChunkStore { .map(|dt| dt.clone().into()) } - /// Lookup the _latest_ arrow [`Arrow2DataType`] used by a specific [`re_types_core::Component`]. - #[inline] - pub fn lookup_datatype_arrow2( - &self, - component_name: &ComponentName, - ) -> Option<&Arrow2DataType> { - self.type_registry.get(component_name) - } - /// Lookup the [`ColumnMetadata`] for a specific [`EntityPath`] and [`re_types_core::Component`]. pub fn lookup_column_metadata( &self, diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index cd302a51709f..6f0c4947f6c0 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -7,9 +7,12 @@ use std::{ }; use arrow::{ - array::RecordBatch as ArrowRecordBatch, buffer::ScalarBuffer as ArrowScalarBuffer, - datatypes::Fields as ArrowFields, datatypes::Schema as ArrowSchema, - datatypes::SchemaRef as ArrowSchemaRef, + array::{ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch}, + buffer::ScalarBuffer as ArrowScalarBuffer, + datatypes::{ + DataType as ArrowDataType, Fields as ArrowFields, Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, + }, }; use arrow2::{ array::{ @@ -242,13 +245,10 @@ impl QueryHandle { if let Some(clear_chunks) = clear_chunks.get(&descr.entity_path) { chunks.extend(clear_chunks.iter().map(|chunk| { let child_datatype = match &descr.store_datatype { - arrow2::datatypes::DataType::List(field) - | arrow2::datatypes::DataType::LargeList(field) => { + ArrowDataType::List(field) | ArrowDataType::LargeList(field) => { field.data_type().clone() } - arrow2::datatypes::DataType::Dictionary(_, datatype, _) => { - (**datatype).clone() - } + ArrowDataType::Dictionary(_, datatype) => (**datatype).clone(), datatype => datatype.clone(), }; @@ -256,14 +256,14 @@ impl QueryHandle { // Only way this could fail is if the number of rows did not match. #[allow(clippy::unwrap_used)] chunk - .add_component_arrow2( + .add_component( re_types_core::ComponentDescriptor { component_name: descr.component_name, archetype_name: descr.archetype_name, archetype_field_name: descr.archetype_field_name, }, - re_arrow_util::arrow2_util::new_list_array_of_empties( - child_datatype, + re_arrow_util::arrow_util::new_list_array_of_empties( + &child_datatype, chunk.num_rows(), ), ) @@ -429,7 +429,7 @@ impl QueryHandle { component_name: ComponentName::from( selected_component_name.clone(), ), - store_datatype: arrow2::datatypes::DataType::Null, + store_datatype: ArrowDataType::Null, is_static: false, is_indicator: false, is_tombstone: false, @@ -794,39 +794,7 @@ impl QueryHandle { #[inline] pub fn next_row(&self) -> Option> { self.engine - .with(|store, cache| self._next_row_arrow2(store, cache)) - .map(|vec| vec.into_iter().map(|a| a.into()).collect()) - } - - /// Returns the next row's worth of data. - /// - /// The returned vector of Arrow arrays strictly follows the schema specified by [`Self::schema`]. - /// Columns that do not yield any data will still be present in the results, filled with null values. - /// - /// Each cell in the result corresponds to the latest _locally_ known value at that particular point in - /// the index, for each respective `ColumnDescriptor`. - /// See [`QueryExpression::sparse_fill_strategy`] to go beyond local resolution. - /// - /// Example: - /// ```ignore - /// while let Some(row) = query_handle.next_row() { - /// // … - /// } - /// ``` - /// - /// ## Pagination - /// - /// Use [`Self::seek_to_row`]: - /// ```ignore - /// query_handle.seek_to_row(42); - /// for row in query_handle.into_iter().take(len) { - /// // … - /// } - /// ``` - #[inline] - fn next_row_arrow2(&self) -> Option>> { - self.engine - .with(|store, cache| self._next_row_arrow2(store, cache)) + .with(|store, cache| self._next_row(store, cache)) } /// Asynchronously returns the next row's worth of data. @@ -845,15 +813,13 @@ impl QueryHandle { /// } /// ``` #[cfg(not(target_arch = "wasm32"))] - pub fn next_row_async_arrow2( - &self, - ) -> impl std::future::Future>>> + pub fn next_row_async(&self) -> impl std::future::Future>> where E: 'static + Send + Clone, { let res: Option> = self .engine - .try_with(|store, cache| self._next_row_arrow2(store, cache)); + .try_with(|store, cache| self._next_row(store, cache)); let engine = self.engine.clone(); std::future::poll_fn(move |cx| { @@ -883,11 +849,7 @@ impl QueryHandle { }) } - pub fn _next_row_arrow2( - &self, - store: &ChunkStore, - cache: &QueryCache, - ) -> Option>> { + pub fn _next_row(&self, store: &ChunkStore, cache: &QueryCache) -> Option> { re_tracing::profile_function!(); /// Temporary state used to resolve the streaming join for the current iteration. @@ -1240,10 +1202,8 @@ impl QueryHandle { .map(|(view_idx, column)| match column { ColumnDescriptor::Time(descr) => { max_value_per_index.get(&descr.timeline()).map_or_else( - || arrow2::array::new_null_array(column.arrow2_datatype(), 1), - |(_time, time_sliced)| { - descr.typ().make_arrow_array(time_sliced.clone()).into() - }, + || arrow::array::new_null_array(&column.arrow_datatype(), 1), + |(_time, time_sliced)| descr.typ().make_arrow_array(time_sliced.clone()), ) } @@ -1251,7 +1211,8 @@ impl QueryHandle { .get(*view_idx) .cloned() .flatten() - .unwrap_or_else(|| arrow2::array::new_null_array(column.arrow2_datatype(), 1)), + .map(|a| a.into()) + .unwrap_or_else(|| arrow::array::new_null_array(&column.arrow_datatype(), 1)), }) .collect_vec(); @@ -1278,28 +1239,27 @@ impl QueryHandle { where E: 'static + Send + Clone, { - let row = self.next_row_async_arrow2().await?; + let row = self.next_row_async().await?; // If we managed to get a row, then the state must be initialized already. #[allow(clippy::unwrap_used)] let schema = self.state.get().unwrap().arrow_schema.clone(); - // TODO(#3741): remove the collect - ArrowRecordBatch::try_new(schema, row.into_iter().map(|a| a.into()).collect()).ok() + ArrowRecordBatch::try_new(schema, row).ok() } } impl QueryHandle { /// Returns an iterator backed by [`Self::next_row`]. #[allow(clippy::should_implement_trait)] // we need an anonymous closure, this won't work - pub fn iter(&self) -> impl Iterator>> + '_ { - std::iter::from_fn(move || self.next_row_arrow2()) + pub fn iter(&self) -> impl Iterator> + '_ { + std::iter::from_fn(move || self.next_row()) } /// Returns an iterator backed by [`Self::next_row`]. #[allow(clippy::should_implement_trait)] // we need an anonymous closure, this won't work - pub fn into_iter(self) -> impl Iterator>> { - std::iter::from_fn(move || self.next_row_arrow2()) + pub fn into_iter(self) -> impl Iterator> { + std::iter::from_fn(move || self.next_row()) } /// Returns an iterator backed by [`Self::next_row_batch`]. diff --git a/crates/store/re_protos/proto/rerun/v0/common.proto b/crates/store/re_protos/proto/rerun/v0/common.proto index 3d6521e1ecf5..e435ae86b431 100644 --- a/crates/store/re_protos/proto/rerun/v0/common.proto +++ b/crates/store/re_protos/proto/rerun/v0/common.proto @@ -92,10 +92,10 @@ message Query { // Only rows where this column contains non-null data be kept in the final dataset. ComponentColumnSelector filtered_is_not_null = 9; - /// The specific _columns_ to sample from the final view contents. - /// The order of the samples will be respected in the final result. + // The specific _columns_ to sample from the final view contents. + // The order of the samples will be respected in the final result. /// - /// If unspecified, it means - everything. + // If unspecified, it means - everything. ColumnSelection column_selection = 10; // Specifies how null values should be filled in the returned dataframe. @@ -110,31 +110,37 @@ message ColumnDescriptor { } message TimeColumnDescriptor { - /// The timeline this column is associated with. + // The timeline this column is associated with. Timeline timeline = 1; - /// The Arrow datatype of the column. + // The Arrow datatype of the column. + /// + // Currently this is just the `Display` of the `arrow-rs` `DataType`. + // TODO(emilk): use arrow IPC instead. string datatype = 2; } -/// Describes a data/component column, such as `Position3D`. +// Describes a data/component column, such as `Position3D`. message ComponentColumnDescriptor { - /// The path of the entity. + // The path of the entity. EntityPath entity_path = 1; - /// Optional name of the `Archetype` associated with this data. + // Optional name of the `Archetype` associated with this data. optional string archetype_name = 2; - /// Optional name of the field within `Archetype` associated with this data. + // Optional name of the field within `Archetype` associated with this data. optional string archetype_field_name = 3; - /// Semantic name associated with this data. + // Semantic name associated with this data. string component_name = 4; - /// The Arrow datatype of the column. + // The Arrow datatype of the column. + /// + // Currently this is just the `Display` of the `arrow-rs` `DataType`. + // TODO(emilk): use arrow IPC instead. string datatype = 5; - /// Whether the column is a static column. + // Whether the column is a static column. bool is_static = 6; - /// Whether the column is a tombstone column. + // Whether the column is a tombstone column. bool is_tombstone = 7; - /// Whether the column is an indicator column. + // Whether the column is an indicator column. bool is_indicator = 8; - /// Whether the column is semantically empty. + // Whether the column is semantically empty. bool is_semantically_empty = 9; } diff --git a/crates/store/re_protos/proto/rerun/v0/log_msg.proto b/crates/store/re_protos/proto/rerun/v0/log_msg.proto index fef64bcc3dcb..607c946c718e 100644 --- a/crates/store/re_protos/proto/rerun/v0/log_msg.proto +++ b/crates/store/re_protos/proto/rerun/v0/log_msg.proto @@ -86,7 +86,7 @@ message StoreInfo { // Unique ID of the recording. rerun.common.v0.StoreId store_id = 2; - /// True if the recording is one of the official Rerun examples. + // True if the recording is one of the official Rerun examples. bool is_official_example = 3; // When the recording started. diff --git a/crates/store/re_protos/src/v0/rerun.common.v0.rs b/crates/store/re_protos/src/v0/rerun.common.v0.rs index eabb6cc3118e..00467a9af6f1 100644 --- a/crates/store/re_protos/src/v0/rerun.common.v0.rs +++ b/crates/store/re_protos/src/v0/rerun.common.v0.rs @@ -140,10 +140,10 @@ pub struct Query { /// Only rows where this column contains non-null data be kept in the final dataset. #[prost(message, optional, tag = "9")] pub filtered_is_not_null: ::core::option::Option, - /// / The specific _columns_ to sample from the final view contents. - /// / The order of the samples will be respected in the final result. + /// The specific _columns_ to sample from the final view contents. + /// The order of the samples will be respected in the final result. /// / - /// / If unspecified, it means - everything. + /// If unspecified, it means - everything. #[prost(message, optional, tag = "10")] pub column_selection: ::core::option::Option, /// Specifies how null values should be filled in the returned dataframe. @@ -187,10 +187,13 @@ impl ::prost::Name for ColumnDescriptor { } #[derive(Clone, PartialEq, ::prost::Message)] pub struct TimeColumnDescriptor { - /// / The timeline this column is associated with. + /// The timeline this column is associated with. #[prost(message, optional, tag = "1")] pub timeline: ::core::option::Option, - /// / The Arrow datatype of the column. + /// The Arrow datatype of the column. + /// / + /// Currently this is just the `Display` of the `arrow-rs` `DataType`. + /// TODO(emilk): use arrow IPC instead. #[prost(string, tag = "2")] pub datatype: ::prost::alloc::string::String, } @@ -204,34 +207,37 @@ impl ::prost::Name for TimeColumnDescriptor { "/rerun.common.v0.TimeColumnDescriptor".into() } } -/// / Describes a data/component column, such as `Position3D`. +/// Describes a data/component column, such as `Position3D`. #[derive(Clone, PartialEq, ::prost::Message)] pub struct ComponentColumnDescriptor { - /// / The path of the entity. + /// The path of the entity. #[prost(message, optional, tag = "1")] pub entity_path: ::core::option::Option, - /// / Optional name of the `Archetype` associated with this data. + /// Optional name of the `Archetype` associated with this data. #[prost(string, optional, tag = "2")] pub archetype_name: ::core::option::Option<::prost::alloc::string::String>, - /// / Optional name of the field within `Archetype` associated with this data. + /// Optional name of the field within `Archetype` associated with this data. #[prost(string, optional, tag = "3")] pub archetype_field_name: ::core::option::Option<::prost::alloc::string::String>, - /// / Semantic name associated with this data. + /// Semantic name associated with this data. #[prost(string, tag = "4")] pub component_name: ::prost::alloc::string::String, - /// / The Arrow datatype of the column. + /// The Arrow datatype of the column. + /// / + /// Currently this is just the `Display` of the `arrow-rs` `DataType`. + /// TODO(emilk): use arrow IPC instead. #[prost(string, tag = "5")] pub datatype: ::prost::alloc::string::String, - /// / Whether the column is a static column. + /// Whether the column is a static column. #[prost(bool, tag = "6")] pub is_static: bool, - /// / Whether the column is a tombstone column. + /// Whether the column is a tombstone column. #[prost(bool, tag = "7")] pub is_tombstone: bool, - /// / Whether the column is an indicator column. + /// Whether the column is an indicator column. #[prost(bool, tag = "8")] pub is_indicator: bool, - /// / Whether the column is semantically empty. + /// Whether the column is semantically empty. #[prost(bool, tag = "9")] pub is_semantically_empty: bool, } diff --git a/crates/store/re_protos/src/v0/rerun.log_msg.v0.rs b/crates/store/re_protos/src/v0/rerun.log_msg.v0.rs index 88dcae5534a3..c7df324f1cfc 100644 --- a/crates/store/re_protos/src/v0/rerun.log_msg.v0.rs +++ b/crates/store/re_protos/src/v0/rerun.log_msg.v0.rs @@ -114,7 +114,7 @@ pub struct StoreInfo { /// Unique ID of the recording. #[prost(message, optional, tag = "2")] pub store_id: ::core::option::Option, - /// / True if the recording is one of the official Rerun examples. + /// True if the recording is one of the official Rerun examples. #[prost(bool, tag = "3")] pub is_official_example: bool, /// When the recording started. diff --git a/crates/store/re_query/Cargo.toml b/crates/store/re_query/Cargo.toml index c41b0c74bb32..ef481d48214e 100644 --- a/crates/store/re_query/Cargo.toml +++ b/crates/store/re_query/Cargo.toml @@ -42,6 +42,7 @@ re_types_core.workspace = true # External dependencies: ahash.workspace = true anyhow.workspace = true +arrow.workspace = true arrow2.workspace = true backtrace.workspace = true indent.workspace = true diff --git a/crates/store/re_query/examples/latest_at.rs b/crates/store/re_query/examples/latest_at.rs index 749858f089d5..0991ac6b8884 100644 --- a/crates/store/re_query/examples/latest_at.rs +++ b/crates/store/re_query/examples/latest_at.rs @@ -1,10 +1,10 @@ use std::sync::Arc; use anyhow::Context; -use arrow2::array::PrimitiveArray as Arrow2PrimitiveArray; +use arrow::array::UInt32Array as ArrowUInt32Array; use itertools::Itertools; -use re_arrow_util::Arrow2ArrayDowncastRef as _; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_chunk::{Chunk, RowId}; use re_chunk_store::{ChunkStore, ChunkStoreHandle, LatestAtQuery}; use re_log_types::example_components::{MyColor, MyLabel, MyPoint, MyPoints}; @@ -75,16 +75,12 @@ fn main() -> anyhow::Result<()> { // data directly: let colors = colors .context("missing")? - .component_batch_raw_arrow2(&MyColor::name()) + .component_batch_raw(&MyColor::name()) .context("invalid")?; let colors = colors - .downcast_array2_ref::>() + .downcast_array_ref::() .context("invalid")?; - let colors = colors - .values() - .as_slice() - .iter() - .map(|&color| MyColor(color)); + let colors = colors.values().iter().map(|&color| MyColor(color)); // And finally apply your instance-level joining logic, if any: let color_default_fn = || MyColor(0xFF00FFFF); diff --git a/crates/store/re_query/src/latest_at.rs b/crates/store/re_query/src/latest_at.rs index 922cd3a648b1..cba551d4d681 100644 --- a/crates/store/re_query/src/latest_at.rs +++ b/crates/store/re_query/src/latest_at.rs @@ -4,7 +4,7 @@ use std::{ sync::Arc, }; -use arrow2::array::Array as Arrow2Array; +use arrow::array::ArrayRef as ArrowArrayRef; use nohash_hasher::IntMap; use parking_lot::RwLock; @@ -321,17 +321,6 @@ impl LatestAtResults { .component_batch_raw(component_name) } - /// Returns the raw data for the specified component. - #[inline] - pub fn component_batch_raw_arrow2( - &self, - component_name: &ComponentName, - ) -> Option> { - self.components - .get(component_name) - .and_then(|unit| unit.component_batch_raw_arrow2(component_name)) - } - /// Returns the deserialized data for the specified component. /// /// Logs at the specified `log_level` if the data cannot be deserialized. @@ -372,7 +361,7 @@ impl LatestAtResults { log_level: re_log::Level, component_name: &ComponentName, instance_index: usize, - ) -> Option> { + ) -> Option { self.components.get(component_name).and_then(|unit| { self.ok_or_log_err( log_level, @@ -390,7 +379,7 @@ impl LatestAtResults { &self, component_name: &ComponentName, instance_index: usize, - ) -> Option> { + ) -> Option { self.component_instance_raw_with_log_level( re_log::Level::Error, component_name, @@ -404,7 +393,7 @@ impl LatestAtResults { &self, component_name: &ComponentName, instance_index: usize, - ) -> Option> { + ) -> Option { self.components.get(component_name).and_then(|unit| { unit.component_instance_raw(component_name, instance_index)? .ok() @@ -458,7 +447,7 @@ impl LatestAtResults { &self, log_level: re_log::Level, component_name: &ComponentName, - ) -> Option> { + ) -> Option { self.components.get(component_name).and_then(|unit| { self.ok_or_log_err( log_level, @@ -472,10 +461,7 @@ impl LatestAtResults { /// /// Returns an error if the underlying batch is not of unit length. #[inline] - pub fn component_mono_raw( - &self, - component_name: &ComponentName, - ) -> Option> { + pub fn component_mono_raw(&self, component_name: &ComponentName) -> Option { self.component_mono_raw_with_log_level(re_log::Level::Error, component_name) } @@ -486,7 +472,7 @@ impl LatestAtResults { pub fn component_mono_raw_quiet( &self, component_name: &ComponentName, - ) -> Option> { + ) -> Option { self.components .get(component_name) .and_then(|unit| unit.component_mono_raw(component_name)?.ok()) diff --git a/rerun_py/Cargo.toml b/rerun_py/Cargo.toml index 646e2b86b1d3..35c728da2a64 100644 --- a/rerun_py/Cargo.toml +++ b/rerun_py/Cargo.toml @@ -75,7 +75,6 @@ re_ws_comms = { workspace = true, optional = true } arrow = { workspace = true, features = ["pyarrow"] } -arrow2 = { workspace = true, features = ["io_ipc", "io_print", "arrow"] } crossbeam.workspace = true document-features.workspace = true itertools.workspace = true diff --git a/rerun_py/src/arrow.rs b/rerun_py/src/arrow.rs index 16dc761d3066..ef690e40db95 100644 --- a/rerun_py/src/arrow.rs +++ b/rerun_py/src/arrow.rs @@ -3,21 +3,21 @@ use std::borrow::Cow; use arrow::{ - array::{make_array, ArrayData, ArrayRef as ArrowArrayRef}, + array::{ + make_array, ArrayData as ArrowArrayData, ArrayRef as ArrowArrayRef, + ListArray as ArrowListArray, + }, + buffer::OffsetBuffer as ArrowOffsetBuffer, + datatypes::Field as ArrowField, pyarrow::PyArrowType, }; -use arrow2::{ - array::{Array, ListArray}, - datatypes::Field, - offset::Offsets, -}; use pyo3::{ exceptions::PyRuntimeError, types::{PyAnyMethods, PyDict, PyDictMethods, PyString}, Bound, PyAny, PyResult, }; -use re_arrow_util::Arrow2ArrayDowncastRef as _; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_chunk::{Chunk, ChunkError, ChunkId, PendingRow, RowId, TimeColumn, TransportChunk}; use re_log_types::TimePoint; use re_sdk::{external::nohash_hasher::IntMap, ComponentDescriptor, EntityPath, Timeline}; @@ -51,29 +51,26 @@ pub fn descriptor_to_rust(component_descr: &Bound<'_, PyAny>) -> PyResult, component_descr: &ComponentDescriptor, -) -> PyResult<(Box, Field)> { - let py_array: PyArrowType = arrow_array.extract()?; - let arr1_array = make_array(py_array.0); - - let data = arr1_array.to_data(); - let arr2_array = arrow2::array::from_data(&data); +) -> PyResult<(ArrowArrayRef, ArrowField)> { + let py_array: PyArrowType = arrow_array.extract()?; + let array = make_array(py_array.0); - let datatype = arr2_array.data_type().to_logical_type().clone(); + let datatype = array.data_type(); let metadata = TransportChunk::field_metadata_component_descriptor(component_descr); - let field = Field::new( + let field = ArrowField::new( component_descr.component_name.to_string(), datatype.clone(), true, ) - .with_metadata(metadata.into_iter().collect()); // TODO(#3741) + .with_metadata(metadata); - Ok((arr2_array, field)) + Ok((array, field)) } /// Build a [`PendingRow`] given a '**kwargs'-style dictionary of component arrays. @@ -90,7 +87,7 @@ pub fn build_row_from_components( let component_descr = descriptor_to_rust(&component_descr)?; let (list_array, _field) = array_to_rust(&array, &component_descr)?; - components.insert(component_descr, list_array.into()); + components.insert(component_descr, list_array); } Ok(PendingRow { @@ -110,7 +107,7 @@ pub fn build_chunk_from_components( let chunk_id = ChunkId::new(); // Extract the timeline data - let (arrays, fields): (Vec>, Vec) = itertools::process_results( + let (arrays, fields): (Vec, Vec) = itertools::process_results( timelines.iter().map(|(name, array)| { let py_name = name.downcast::()?; let name: std::borrow::Cow<'_, str> = py_name.extract()?; @@ -126,18 +123,18 @@ pub fn build_chunk_from_components( let timeline_data = TimeColumn::read_array(&ArrowArrayRef::from(array)).map_err(|err| { ChunkError::Malformed { - reason: format!("Invalid timeline {}: {err}", field.name), + reason: format!("Invalid timeline {}: {err}", field.name()), } })?; let timeline = match field.data_type() { - arrow2::datatypes::DataType::Int64 => { - Ok(Timeline::new_sequence(field.name.clone())) + arrow::datatypes::DataType::Int64 => { + Ok(Timeline::new_sequence(field.name().clone())) } - arrow2::datatypes::DataType::Timestamp(_, _) => { - Ok(Timeline::new_temporal(field.name.clone())) + arrow::datatypes::DataType::Timestamp(_, _) => { + Ok(Timeline::new_temporal(field.name().clone())) } _ => Err(ChunkError::Malformed { - reason: format!("Invalid data_type for timeline: {}", field.name), + reason: format!("Invalid data_type for timeline: {}", field.name()), }), }?; Ok((timeline, timeline_data)) @@ -151,7 +148,7 @@ pub fn build_chunk_from_components( .collect(); // Extract the component data - let (arrays, fields): (Vec>, Vec) = itertools::process_results( + let (arrays, fields): (Vec, Vec) = itertools::process_results( components_per_descr.iter().map(|(component_descr, array)| { array_to_rust(&array, &descriptor_to_rust(&component_descr)?) }), @@ -162,22 +159,20 @@ pub fn build_chunk_from_components( .into_iter() .zip(fields) .map(|(value, field)| { - let batch = if let Some(batch) = value.downcast_array2_ref::>() { + let batch = if let Some(batch) = value.downcast_array_ref::() { batch.clone() } else { - let offsets = Offsets::try_from_lengths(std::iter::repeat(1).take(value.len())) - .map_err(|err| ChunkError::Malformed { - reason: format!("Failed to create offsets: {err}"), - })?; - let data_type = ListArray::::default_datatype(value.data_type().clone()); - ListArray::::try_new(data_type, offsets.into(), value, None).map_err( - |err| ChunkError::Malformed { + let offsets = + ArrowOffsetBuffer::from_lengths(std::iter::repeat(1).take(value.len())); + let field = ArrowField::new("item", value.data_type().clone(), true).into(); + ArrowListArray::try_new(field, offsets, value, None).map_err(|err| { + ChunkError::Malformed { reason: format!("Failed to wrap in List array: {err}"), - }, - )? + } + })? }; - Ok((ComponentDescriptor::new(field.name), batch)) + Ok((ComponentDescriptor::new(field.name().clone()), batch)) }) .collect(); diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index a0b4a97a1674..8298c2f5108b 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -1136,9 +1136,9 @@ fn log_arrow_msg( /// ------ /// entity_path: `str` /// The entity path to log the chunk to. -/// timelines: `Dict[str, Arrow2PrimitiveArray]` +/// timelines: `Dict[str, arrow::Int64Array]` /// A dictionary mapping timeline names to their values. -/// components: `Dict[str, ArrowListArray]` +/// components: `Dict[str, arrow::ListArray]` /// A dictionary mapping component names to their values. #[pyfunction] #[pyo3(signature = ( diff --git a/rerun_py/src/video.rs b/rerun_py/src/video.rs index 0ada08d02c29..5a5c0e39222b 100644 --- a/rerun_py/src/video.rs +++ b/rerun_py/src/video.rs @@ -2,7 +2,7 @@ use pyo3::{exceptions::PyRuntimeError, pyfunction, Bound, PyAny, PyResult}; -use re_arrow_util::Arrow2ArrayDowncastRef as _; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_sdk::ComponentDescriptor; use re_video::VideoLoadError; @@ -28,8 +28,8 @@ pub fn asset_video_read_frame_timestamps_ns( let video_bytes_arrow_array = array_to_rust(video_bytes_arrow_array, &component_descr)?.0; let video_bytes_arrow_uint8_array = video_bytes_arrow_array - .downcast_array2_ref::>() - .and_then(|arr| arr.values().downcast_array2_ref::()) + .downcast_array_ref::() + .and_then(|arr| arr.values().downcast_array_ref::()) .ok_or_else(|| { PyRuntimeError::new_err(format!( "Expected arrow array to be a list with a single uint8 array, instead it has the datatype {:?}", @@ -37,7 +37,7 @@ pub fn asset_video_read_frame_timestamps_ns( )) })?; - let video_bytes = video_bytes_arrow_uint8_array.values().as_slice(); + let video_bytes = video_bytes_arrow_uint8_array.values().as_ref(); let Some(media_type) = media_type.or_else(|| infer::Infer::new().get(video_bytes).map(|v| v.mime_type())) From 828e317c51cb887d4bee25d3155d3314fba2572e Mon Sep 17 00:00:00 2001 From: Zeljko Mihaljcic <7150613+zehiko@users.noreply.github.com> Date: Fri, 17 Jan 2025 13:30:34 +0100 Subject: [PATCH 5/5] define Catalog fields name in the protobuf spec crate and use them in the catalog view (#8710) This fixes the broken catalog view, regression was caused by recent renaming of fields on the SN side. Also there was 1 tiny change related to arrow migration (representation of Timestamp array) that needed to be changed as well. --- crates/store/re_grpc_client/src/lib.rs | 25 ++++++++++++++++--------- crates/store/re_protos/src/lib.rs | 12 ++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs index 487d27098508..be5fa4ba2f99 100644 --- a/crates/store/re_grpc_client/src/lib.rs +++ b/crates/store/re_grpc_client/src/lib.rs @@ -22,7 +22,8 @@ use re_protos::{ common::v0::RecordingId, remote_store::v0::{ storage_node_client::StorageNodeClient, CatalogFilter, FetchRecordingRequest, - QueryCatalogRequest, + QueryCatalogRequest, CATALOG_APP_ID_FIELD_NAME, CATALOG_ID_FIELD_NAME, + CATALOG_START_TIME_FIELD_NAME, }, }; use re_types::{ @@ -283,27 +284,33 @@ pub fn store_info_from_catalog_chunk( let (_field, data) = tc .components() - .find(|(f, _)| f.name() == "application_id") + .find(|(f, _)| f.name() == CATALOG_APP_ID_FIELD_NAME) .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { - reason: "no application_id field found".to_owned(), + reason: "no {CATALOG_APP_ID_FIELD_NAME} field found".to_owned(), }))?; let app_id = data .downcast_array_ref::() .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { - reason: format!("application_id must be a utf8 array: {:?}", tc.schema_ref()), + reason: format!( + "{CATALOG_APP_ID_FIELD_NAME} must be a utf8 array: {:?}", + tc.schema_ref() + ), }))? .value(0); let (_field, data) = tc .components() - .find(|(f, _)| f.name() == "start_time") + .find(|(f, _)| f.name() == CATALOG_START_TIME_FIELD_NAME) .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { - reason: "no start_time field found".to_owned(), + reason: "no {CATALOG_START_TIME_FIELD_NAME}} field found".to_owned(), }))?; let start_time = data - .downcast_array_ref::() + .downcast_array_ref::() .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { - reason: format!("start_time must be an int64 array: {:?}", tc.schema_ref()), + reason: format!( + "{CATALOG_START_TIME_FIELD_NAME} must be a Timestamp array: {:?}", + tc.schema_ref() + ), }))? .value(0); @@ -485,7 +492,7 @@ async fn stream_catalog_async( )))?; let recording_uri_arrays: Vec = chunk - .iter_slices::("id".into()) + .iter_slices::(CATALOG_ID_FIELD_NAME.into()) .map(|id| { let rec_id = &id[0]; // each component batch is of length 1 i.e. single 'id' value diff --git a/crates/store/re_protos/src/lib.rs b/crates/store/re_protos/src/lib.rs index e41036697752..df3a13b66375 100644 --- a/crates/store/re_protos/src/lib.rs +++ b/crates/store/re_protos/src/lib.rs @@ -48,8 +48,20 @@ pub mod log_msg { /// Generated types for the remote store gRPC service API v0. pub mod remote_store { + pub mod v0 { pub use crate::v0::rerun_remote_store_v0::*; + + /// Recording catalog mandatory field names. All mandatory metadata fields are prefixed + /// with "rerun_" to avoid conflicts with user-defined fields. + pub const CATALOG_ID_FIELD_NAME: &str = "rerun_recording_id"; + pub const CATALOG_APP_ID_FIELD_NAME: &str = "rerun_application_id"; + pub const CATALOG_START_TIME_FIELD_NAME: &str = "rerun_start_time"; + pub const CATALOG_DESCRIPTION_FIELD_NAME: &str = "rerun_description"; + pub const CATALOG_RECORDING_TYPE_FIELD_NAME: &str = "rerun_recording_type"; + pub const CATALOG_STORAGE_URL_FIELD_NAME: &str = "rerun_storage_url"; + pub const CATALOG_REGISTRATION_TIME_FIELD_NAME: &str = "rerun_registration_time"; + pub const CATALOG_ROW_ID_FIELD_NAME: &str = "rerun_row_id"; } }