Skip to content

Commit

Permalink
Fix bug in field level metadata matching code (apache#8286)
Browse files Browse the repository at this point in the history
* Fix bug in field level metadata matching code

* improve comment

* single map
  • Loading branch information
alamb committed Nov 21, 2023
1 parent 04c77ca commit e50b84d
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 18 deletions.
16 changes: 6 additions & 10 deletions datafusion/physical-plan/src/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,16 +257,12 @@ fn get_field_metadata(
e: &Arc<dyn PhysicalExpr>,
input_schema: &Schema,
) -> Option<HashMap<String, String>> {
let name = if let Some(column) = e.as_any().downcast_ref::<Column>() {
column.name()
} else {
return None;
};

input_schema
.field_with_name(name)
.ok()
.map(|f| f.metadata().clone())
// Look up field by index in schema (not NAME as there can be more than one
// column with the same name)
e.as_any()
.downcast_ref::<Column>()
.map(|column| input_schema.field(column.index()).metadata())
.cloned()
}

fn stats_projection(
Expand Down
44 changes: 36 additions & 8 deletions datafusion/sqllogictest/src/test_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use datafusion::{
};
use datafusion_common::DataFusionError;
use log::info;
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::path::Path;
Expand All @@ -57,8 +58,8 @@ impl TestContext {
}
}

/// Create a SessionContext, configured for the specific test, if
/// possible.
/// Create a SessionContext, configured for the specific sqllogictest
/// test(.slt file) , if possible.
///
/// If `None` is returned (e.g. because some needed feature is not
/// enabled), the file should be skipped
Expand All @@ -67,7 +68,7 @@ impl TestContext {
// hardcode target partitions so plans are deterministic
.with_target_partitions(4);

let test_ctx = TestContext::new(SessionContext::new_with_config(config));
let mut test_ctx = TestContext::new(SessionContext::new_with_config(config));

let file_name = relative_path.file_name().unwrap().to_str().unwrap();
match file_name {
Expand All @@ -86,10 +87,8 @@ impl TestContext {
"avro.slt" => {
#[cfg(feature = "avro")]
{
let mut test_ctx = test_ctx;
info!("Registering avro tables");
register_avro_tables(&mut test_ctx).await;
return Some(test_ctx);
}
#[cfg(not(feature = "avro"))]
{
Expand All @@ -99,10 +98,11 @@ impl TestContext {
}
"joins.slt" => {
info!("Registering partition table tables");

let mut test_ctx = test_ctx;
register_partition_table(&mut test_ctx).await;
return Some(test_ctx);
}
"metadata.slt" => {
info!("Registering metadata table tables");
register_metadata_tables(test_ctx.session_ctx()).await;
}
_ => {
info!("Using default SessionContext");
Expand Down Expand Up @@ -299,3 +299,31 @@ fn table_with_many_types() -> Arc<dyn TableProvider> {
let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]]).unwrap();
Arc::new(provider)
}

/// Registers a table_with_metadata that contains both field level and Table level metadata
pub async fn register_metadata_tables(ctx: &SessionContext) {
let id = Field::new("id", DataType::Int32, true).with_metadata(HashMap::from([(
String::from("metadata_key"),
String::from("the id field"),
)]));
let name = Field::new("name", DataType::Utf8, true).with_metadata(HashMap::from([(
String::from("metadata_key"),
String::from("the name field"),
)]));

let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
String::from("metadata_key"),
String::from("the entire schema"),
)]));

let batch = RecordBatch::try_new(
Arc::new(schema),
vec![
Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) as _,
],
)
.unwrap();

ctx.register_batch("table_with_metadata", batch).unwrap();
}
62 changes: 62 additions & 0 deletions datafusion/sqllogictest/test_files/metadata.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

##########
## Tests for tables that has both metadata on each field as well as metadata on
## the schema itself.
##########

## Note that table_with_metadata is defined using Rust code
## in the test harness as there is no way to define schema
## with metadata in SQL.

query IT
select * from table_with_metadata;
----
1 NULL
NULL bar
3 baz

query I rowsort
SELECT (
SELECT id FROM table_with_metadata
) UNION (
SELECT id FROM table_with_metadata
);
----
1
3
NULL

query I rowsort
SELECT "data"."id"
FROM
(
(SELECT "id" FROM "table_with_metadata")
UNION
(SELECT "id" FROM "table_with_metadata")
) as "data",
(
SELECT "id" FROM "table_with_metadata"
) as "samples"
WHERE "data"."id" = "samples"."id";
----
1
3

statement ok
drop table table_with_metadata;

0 comments on commit e50b84d

Please sign in to comment.