Skip to content

Commit

Permalink
feat!(ffi): expose metadata in SchemaEngineVisitor ffi api (#659)
Browse files Browse the repository at this point in the history
- new feature PR: "Expose metadata in SchemaEngineVisitor ffi api"

## What changes are proposed in this pull request?

### This PR affects the following public APIs
A new argument "metadata" was added to EngineSchemaVisitor visitor
functions.


## How was this change tested?
`examples/read_table` was updated to test the change.
  • Loading branch information
kssenii authored Jan 30, 2025
1 parent 3305d3a commit 06d8dbb
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 7 deletions.
36 changes: 32 additions & 4 deletions ffi/examples/read-table/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int paren
}
}

void print_physical_name(const char *name, const CStringMap* metadata)
{
#ifdef VERBOSE
char* key_str = "delta.columnMapping.physicalName";
KernelStringSlice key = { key_str, strlen(key_str) };
char* value = get_from_map(metadata, key, allocate_string);
if (value) {
printf("Physical name of %s is %s\n", name, value);
free(value);
} else {
printf("No physical name\n");
}
#else
(void)name;
(void)metadata;
#endif
}

// declare all our visitor methods
uintptr_t make_field_list(void* data, uintptr_t reserve)
{
Expand All @@ -109,11 +127,13 @@ void visit_struct(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
PRINT_CHILD_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id);
print_physical_name(name_ptr, metadata);
SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct", is_nullable);
struct_item->children = child_list_id;
}
Expand All @@ -123,12 +143,14 @@ void visit_array(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
print_physical_name(name_ptr, metadata);
PRINT_CHILD_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id);
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array", is_nullable);
array_item->children = child_list_id;
Expand All @@ -139,12 +161,14 @@ void visit_map(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
print_physical_name(name_ptr, metadata);
PRINT_CHILD_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id);
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map", is_nullable);
map_item->children = child_list_id;
Expand All @@ -155,13 +179,15 @@ void visit_decimal(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uint8_t precision,
uint8_t scale)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
char* type = malloc(19 * sizeof(char));
snprintf(type, 19, "decimal(%u)(%d)", precision, scale);
print_physical_name(name_ptr, metadata);
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
}
Expand All @@ -171,18 +197,20 @@ void visit_simple_type(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
char* type)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
print_physical_name(name_ptr, metadata);
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
}

#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool is_nullable)\
{ \
visit_simple_type(data, sibling_list_id, name, is_nullable, #typename); \
#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool is_nullable, const CStringMap * metadata)\
{ \
visit_simple_type(data, sibling_list_id, name, is_nullable, metadata, #typename); \
}

DEFINE_VISIT_SIMPLE_TYPE(string)
Expand Down
7 changes: 7 additions & 0 deletions ffi/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,17 @@ type CScanCallback = extern "C" fn(
partition_map: &CStringMap,
);

#[derive(Default)]
pub struct CStringMap {
values: HashMap<String, String>,
}

impl From<HashMap<String, String>> for CStringMap {
fn from(val: HashMap<String, String>) -> Self {
Self { values: val }
}
}

#[no_mangle]
/// allow probing into a CStringMap. If the specified key is in the map, kernel will call
/// allocate_fn with the value associated with the key and return the value returned from that
Expand Down
37 changes: 34 additions & 3 deletions ffi/src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::os::raw::c_void;

use crate::scan::CStringMap;
use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot};
use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType};

/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own
/// representation of a schema from a particular schema within kernel.
///
Expand All @@ -28,7 +30,6 @@ use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructTy
/// that element's (already-visited) children.
/// 4. The [`visit_schema`] method returns the id of the list of top-level columns
// WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods
// TODO: struct field metadata
#[repr(C)]
pub struct EngineSchemaVisitor {
/// opaque state pointer
Expand All @@ -44,6 +45,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
child_list_id: usize,
),

Expand All @@ -54,6 +56,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
child_list_id: usize,
),

Expand All @@ -65,6 +68,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
child_list_id: usize,
),

Expand All @@ -74,6 +78,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
precision: u8,
scale: u8,
),
Expand All @@ -84,6 +89,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `long` belonging to the list identified by `sibling_list_id`.
Expand All @@ -92,6 +98,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit an `integer` belonging to the list identified by `sibling_list_id`.
Expand All @@ -100,6 +107,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `short` belonging to the list identified by `sibling_list_id`.
Expand All @@ -108,6 +116,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `byte` belonging to the list identified by `sibling_list_id`.
Expand All @@ -116,6 +125,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `float` belonging to the list identified by `sibling_list_id`.
Expand All @@ -124,6 +134,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `double` belonging to the list identified by `sibling_list_id`.
Expand All @@ -132,6 +143,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `boolean` belonging to the list identified by `sibling_list_id`.
Expand All @@ -140,6 +152,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit `binary` belonging to the list identified by `sibling_list_id`.
Expand All @@ -148,6 +161,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `date` belonging to the list identified by `sibling_list_id`.
Expand All @@ -156,6 +170,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `timestamp` belonging to the list identified by `sibling_list_id`.
Expand All @@ -164,6 +179,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`.
Expand All @@ -172,6 +188,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),
}

Expand All @@ -197,6 +214,7 @@ pub unsafe extern "C" fn visit_schema(
field.name(),
field.data_type(),
field.is_nullable(),
&field.metadata_with_string_values().into(),
visitor,
child_list_id,
);
Expand All @@ -210,10 +228,12 @@ pub unsafe extern "C" fn visit_schema(
contains_null: bool,
) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, 1);
let metadata = CStringMap::default();
visit_schema_item(
"array_element",
&at.element_type,
contains_null,
&metadata,
visitor,
child_list_id,
);
Expand All @@ -226,11 +246,20 @@ pub unsafe extern "C" fn visit_schema(
value_contains_null: bool,
) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, 2);
visit_schema_item("map_key", &mt.key_type, false, visitor, child_list_id);
let metadata = CStringMap::default();
visit_schema_item(
"map_key",
&mt.key_type,
false,
&metadata,
visitor,
child_list_id,
);
visit_schema_item(
"map_value",
&mt.value_type,
value_contains_null,
&metadata,
visitor,
child_list_id,
);
Expand All @@ -242,6 +271,7 @@ pub unsafe extern "C" fn visit_schema(
name: &str,
data_type: &DataType,
is_nullable: bool,
metadata: &CStringMap,
visitor: &EngineSchemaVisitor,
sibling_list_id: usize,
) {
Expand All @@ -251,7 +281,8 @@ pub unsafe extern "C" fn visit_schema(
visitor.data,
sibling_list_id,
kernel_string_slice!(name),
is_nullable
is_nullable,
metadata
$(, $extra_args) *
)
};
Expand Down

0 comments on commit 06d8dbb

Please sign in to comment.