Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!(ffi): expose metadata in SchemaEngineVisitor ffi api #659

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions ffi/examples/read-table/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,23 @@ void print_list(SchemaBuilder* builder, uintptr_t list_id, int indent, int paren
}
}

void print_metadata(const char *name, const CStringMap* metadata)
scovich marked this conversation as resolved.
Show resolved Hide resolved
{
#ifdef VERBOSE
char* key_str = "delta.columnMapping.physicalName";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTE: This is an internal Delta column that should not actually be user-visible, see #583. That needs to be fixed in kernel proper (not here in the FFI layer), but when it does get fixed this example will stop working.

KernelStringSlice key = { key_str, strlen(key_str) };
char* value = get_from_map(metadata, key, allocate_string);
if (value) {
printf("Physical name of %s is %s\n", name, value);
free(value);
} else {
printf("No physical name\n");
}
#else
(void)metadata;
#endif
}

// declare all our visitor methods
uintptr_t make_field_list(void* data, uintptr_t reserve)
{
Expand All @@ -109,11 +126,13 @@ void visit_struct(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
PRINT_CHILD_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id);
print_metadata(name_ptr, metadata);
SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct", is_nullable);
struct_item->children = child_list_id;
}
Expand All @@ -123,12 +142,14 @@ void visit_array(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
print_metadata(name_ptr, metadata);
PRINT_CHILD_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id);
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array", is_nullable);
array_item->children = child_list_id;
Expand All @@ -139,12 +160,14 @@ void visit_map(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uintptr_t child_list_id)
{
SchemaBuilder* builder = data;
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
print_metadata(name_ptr, metadata);
PRINT_CHILD_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id);
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map", is_nullable);
map_item->children = child_list_id;
Expand All @@ -155,13 +178,15 @@ void visit_decimal(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
uint8_t precision,
uint8_t scale)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
char* type = malloc(19 * sizeof(char));
snprintf(type, 19, "decimal(%u)(%d)", precision, scale);
print_metadata(name_ptr, metadata);
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
}
Expand All @@ -171,18 +196,20 @@ void visit_simple_type(
uintptr_t sibling_list_id,
struct KernelStringSlice name,
bool is_nullable,
const CStringMap * metadata,
char* type)
{
SchemaBuilder* builder = data;
char* name_ptr = allocate_string(name);
print_metadata(name_ptr, metadata);
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
}

#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool is_nullable)\
{ \
visit_simple_type(data, sibling_list_id, name, is_nullable, #typename); \
#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool is_nullable, const CStringMap * metadata)\
{ \
visit_simple_type(data, sibling_list_id, name, is_nullable, metadata, #typename); \
}

DEFINE_VISIT_SIMPLE_TYPE(string)
Expand Down
2 changes: 1 addition & 1 deletion ffi/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ type CScanCallback = extern "C" fn(
);

pub struct CStringMap {
values: HashMap<String, String>,
pub values: HashMap<String, String>,
scovich marked this conversation as resolved.
Show resolved Hide resolved
}

#[no_mangle]
Expand Down
47 changes: 42 additions & 5 deletions ffi/src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::os::raw::c_void;

use crate::scan::CStringMap;
use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot};
use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType};

/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own
/// representation of a schema from a particular schema within kernel.
///
Expand All @@ -28,7 +30,6 @@ use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructTy
/// that element's (already-visited) children.
/// 4. The [`visit_schema`] method returns the id of the list of top-level columns
// WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods
// TODO: struct field metadata
#[repr(C)]
pub struct EngineSchemaVisitor {
/// opaque state pointer
Expand All @@ -44,6 +45,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
child_list_id: usize,
),

Expand All @@ -54,6 +56,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
child_list_id: usize,
),

Expand All @@ -65,6 +68,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
child_list_id: usize,
),

Expand All @@ -74,6 +78,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
precision: u8,
scale: u8,
),
Expand All @@ -84,6 +89,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `long` belonging to the list identified by `sibling_list_id`.
Expand All @@ -92,6 +98,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit an `integer` belonging to the list identified by `sibling_list_id`.
Expand All @@ -100,6 +107,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `short` belonging to the list identified by `sibling_list_id`.
Expand All @@ -108,6 +116,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `byte` belonging to the list identified by `sibling_list_id`.
Expand All @@ -116,6 +125,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `float` belonging to the list identified by `sibling_list_id`.
Expand All @@ -124,6 +134,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `double` belonging to the list identified by `sibling_list_id`.
Expand All @@ -132,6 +143,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `boolean` belonging to the list identified by `sibling_list_id`.
Expand All @@ -140,6 +152,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit `binary` belonging to the list identified by `sibling_list_id`.
Expand All @@ -148,6 +161,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `date` belonging to the list identified by `sibling_list_id`.
Expand All @@ -156,6 +170,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `timestamp` belonging to the list identified by `sibling_list_id`.
Expand All @@ -164,6 +179,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),

/// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`.
Expand All @@ -172,6 +188,7 @@ pub struct EngineSchemaVisitor {
sibling_list_id: usize,
name: KernelStringSlice,
is_nullable: bool,
metadata: &CStringMap,
),
}

Expand All @@ -193,10 +210,14 @@ pub unsafe extern "C" fn visit_schema(
fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len());
for field in s.fields() {
let metadata = CStringMap {
values: field.metadata_with_string_values(),
};
visit_schema_item(
field.name(),
field.data_type(),
field.is_nullable(),
&metadata,
visitor,
child_list_id,
);
Expand All @@ -208,12 +229,14 @@ pub unsafe extern "C" fn visit_schema(
visitor: &EngineSchemaVisitor,
at: &ArrayType,
contains_null: bool,
metadata: &CStringMap,
) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, 1);
visit_schema_item(
"array_element",
&at.element_type,
contains_null,
metadata,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks wrong. We're passing field metadata for the array itself to the visitor for its element type -- even tho the call macro already passed it to the visit_array callback. I'm pretty sure we need to pass an empty map here, because these internal fields don't have metadata associated with them

(same for map key/value visitors below)

visitor,
child_list_id,
);
Expand All @@ -224,13 +247,22 @@ pub unsafe extern "C" fn visit_schema(
visitor: &EngineSchemaVisitor,
mt: &MapType,
value_contains_null: bool,
metadata: &CStringMap,
) -> usize {
let child_list_id = (visitor.make_field_list)(visitor.data, 2);
visit_schema_item("map_key", &mt.key_type, false, visitor, child_list_id);
visit_schema_item(
"map_key",
&mt.key_type,
false,
metadata,
visitor,
child_list_id,
);
visit_schema_item(
"map_value",
&mt.value_type,
value_contains_null,
metadata,
visitor,
child_list_id,
);
Expand All @@ -242,6 +274,7 @@ pub unsafe extern "C" fn visit_schema(
name: &str,
data_type: &DataType,
is_nullable: bool,
metadata: &CStringMap,
visitor: &EngineSchemaVisitor,
sibling_list_id: usize,
) {
Expand All @@ -251,7 +284,8 @@ pub unsafe extern "C" fn visit_schema(
visitor.data,
sibling_list_id,
kernel_string_slice!(name),
is_nullable
is_nullable,
metadata
$(, $extra_args) *
)
};
Expand All @@ -261,11 +295,14 @@ pub unsafe extern "C" fn visit_schema(
DataType::Map(mt) => {
call!(
visit_map,
visit_map_types(visitor, mt, mt.value_contains_null)
visit_map_types(visitor, mt, mt.value_contains_null, metadata)
)
}
DataType::Array(at) => {
call!(visit_array, visit_array_item(visitor, at, at.contains_null))
call!(
visit_array,
visit_array_item(visitor, at, at.contains_null, metadata)
)
}
DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => {
call!(visit_decimal, *precision, *scale)
Expand Down
Loading