Skip to content

Commit

Permalink
Variant data type deserialization, and an example
Browse files Browse the repository at this point in the history
  • Loading branch information
slvrtrn committed Oct 31, 2024
1 parent b6053ec commit 352c6d5
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 50 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ required-features = ["rustls-tls"]
name = "data_types_derive_simple"
required-features = ["time", "uuid"]

[[example]]
name = "data_types_variant"
required-features = ["time"]

[profile.release]
debug = true

Expand Down
170 changes: 170 additions & 0 deletions examples/data_types_variant.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
use clickhouse_derive::Row;
use serde::{Deserialize, Serialize};

use clickhouse::sql::Identifier;
use clickhouse::{error::Result, Client};

// See also: https://clickhouse.com/docs/en/sql-reference/data-types/variant

#[tokio::main]
async fn main() -> Result<()> {
let table_name = "chrs_data_types_variant";
let client = Client::default().with_url("http://localhost:8123");

// No matter the order of the definition on the Variant types in the DDL, this particular Variant will always be sorted as follows:
// Variant(Array(UInt16), Bool, FixedString(6), Float32, Float64, Int128, Int16, Int32, Int64, Int8, String, UInt128, UInt16, UInt32, UInt64, UInt8)
client
.query(
"
CREATE OR REPLACE TABLE ?
(
`id` UInt64,
`var` Variant(
Array(UInt16),
Bool,
Date,
FixedString(6),
Float32, Float64,
Int128, Int16, Int32, Int64, Int8,
String,
UInt128, UInt16, UInt32, UInt64, UInt8
)
)
ENGINE = MergeTree
ORDER BY id",
)
.bind(Identifier(table_name))
.with_option("allow_experimental_variant_type", "1")
// This is required only if we are mixing similar types in the Variant definition
// In this case, this is various Int/UInt types, Float32/Float64, and String/FixedString
// Omit this option if there are no similar types in the definition
.with_option("allow_suspicious_variant_types", "1")
.execute()
.await?;

let mut insert = client.insert(table_name)?;
let rows_to_insert = get_rows();
for row in rows_to_insert {
insert.write(&row).await?;
}
insert.end().await?;

let rows = client
.query("SELECT ?fields FROM ?")
.bind(Identifier(table_name))
.fetch_all::<MyRow>()
.await?;

println!("{rows:#?}");
Ok(())
}

fn get_rows() -> Vec<MyRow> {
vec![
MyRow {
id: 1,
var: MyRowVariant::Array(vec![1, 2]),
},
MyRow {
id: 2,
var: MyRowVariant::Boolean(true),
},
MyRow {
id: 3,
var: MyRowVariant::Date(
time::Date::from_calendar_date(2021, time::Month::January, 1).unwrap(),
),
},
MyRow {
id: 4,
var: MyRowVariant::FixedString(*b"foobar"),
},
MyRow {
id: 5,
var: MyRowVariant::Float32(100.5),
},
MyRow {
id: 6,
var: MyRowVariant::Float64(200.1),
},
MyRow {
id: 7,
var: MyRowVariant::Int8(2),
},
MyRow {
id: 8,
var: MyRowVariant::Int16(3),
},
MyRow {
id: 9,
var: MyRowVariant::Int32(4),
},
MyRow {
id: 10,
var: MyRowVariant::Int64(5),
},
MyRow {
id: 11,
var: MyRowVariant::Int128(6),
},
MyRow {
id: 12,
var: MyRowVariant::String("my_string".to_string()),
},
MyRow {
id: 13,
var: MyRowVariant::UInt8(7),
},
MyRow {
id: 14,
var: MyRowVariant::UInt16(8),
},
MyRow {
id: 15,
var: MyRowVariant::UInt32(9),
},
MyRow {
id: 16,
var: MyRowVariant::UInt64(10),
},
MyRow {
id: 17,
var: MyRowVariant::UInt128(11),
},
]
}

// As the inner Variant types are _always_ sorted alphabetically,
// it should be defined in _exactly_ the same order in the enum.
//
// Rust enum variants names are irrelevant, only the order of the types matters.
// This enum represents Variant(Array(UInt16), Bool, FixedString(6), Float32, Float64, Int128, Int16, Int32, Int64, Int8, String, UInt128, UInt16, UInt32, UInt64, UInt8)
#[derive(Debug, PartialEq, Serialize, Deserialize)]
enum MyRowVariant {
Array(Vec<i16>),
Boolean(bool),
// attributes should work in this case, too
#[serde(with = "clickhouse::serde::time::date")]
Date(time::Date),
// NB: by default, fetched as raw bytes
FixedString([u8; 6]),
Float32(f32),
Float64(f64),
Int128(i128),
Int16(i16),
Int32(i32),
Int64(i64),
Int8(i8),
String(String),
UInt128(u128),
UInt16(i16),
UInt32(u32),
UInt64(u64),
UInt8(i8),
}

#[derive(Debug, PartialEq, Row, Serialize, Deserialize)]
struct MyRow {
id: u64,
var: MyRowVariant,
}
2 changes: 2 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub enum Error {
InvalidUtf8Encoding(#[from] Utf8Error),
#[error("tag for enum is not valid")]
InvalidTagEncoding(usize),
#[error("max number of types in the Variant data type is 255, got {0}")]
VariantDiscriminatorIsOutOfBound(usize),
#[error("a custom error message from serde: {0}")]
Custom(String),
#[error("bad response: {0}")]
Expand Down
18 changes: 13 additions & 5 deletions src/rowbinary/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,20 @@ impl<'a, B: BufMut> Serializer for &'a mut RowBinarySerializer<B> {
#[inline]
fn serialize_newtype_variant<T: Serialize + ?Sized>(
self,
name: &'static str,
_variant_index: u32,
variant: &'static str,
_value: &T,
_name: &'static str,
variant_index: u32,
_variant: &'static str,
value: &T,
) -> Result<()> {
panic!("newtype variant types are unsupported: `{name}::{variant}`");
// Max number of types in the Variant data type is 255
// See also: https://github.com/ClickHouse/ClickHouse/issues/54864
if variant_index > 255 {
return Err(Error::VariantDiscriminatorIsOutOfBound(
variant_index as usize,
));
}
self.buffer.put_u8(variant_index as u8);
value.serialize(self)
}

#[inline]
Expand Down
114 changes: 69 additions & 45 deletions tests/it/variant.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#![cfg(feature = "time")]

use clickhouse::Row;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use time::Month::January;

use clickhouse::Row;

// See also: https://clickhouse.com/docs/en/sql-reference/data-types/variant

#[tokio::test]
Expand All @@ -12,7 +13,7 @@ async fn variant_data_type() {

// NB: Inner Variant types are _always_ sorted alphabetically,
// and should be defined in _exactly_ the same order in the enum.
#[derive(Debug, PartialEq, Deserialize)]
#[derive(Debug, PartialEq, Serialize, Deserialize)]
enum MyRowVariant {
Array(Vec<i16>),
Boolean(bool),
Expand All @@ -35,7 +36,7 @@ async fn variant_data_type() {
UInt8(i8),
}

#[derive(Debug, Row, Deserialize)]
#[derive(Debug, PartialEq, Row, Serialize, Deserialize)]
struct MyRow {
var: MyRowVariant,
}
Expand Down Expand Up @@ -67,55 +68,78 @@ async fn variant_data_type() {
.await
.unwrap();

let rows = vec![
MyRow {
var: MyRowVariant::Array(vec![1, 2]),
},
MyRow {
var: MyRowVariant::Boolean(true),
},
MyRow {
var: MyRowVariant::Date(time::Date::from_calendar_date(2021, January, 1).unwrap()),
},
MyRow {
var: MyRowVariant::FixedString(*b"foobar"),
},
MyRow {
var: MyRowVariant::Float32(100.5),
},
MyRow {
var: MyRowVariant::Float64(200.1),
},
MyRow {
var: MyRowVariant::Int8(2),
},
MyRow {
var: MyRowVariant::Int16(3),
},
MyRow {
var: MyRowVariant::Int32(4),
},
MyRow {
var: MyRowVariant::Int64(5),
},
MyRow {
var: MyRowVariant::Int128(6),
},
MyRow {
var: MyRowVariant::String("my_string".to_string()),
},
MyRow {
var: MyRowVariant::UInt8(7),
},
MyRow {
var: MyRowVariant::UInt16(8),
},
MyRow {
var: MyRowVariant::UInt32(9),
},
MyRow {
var: MyRowVariant::UInt64(10),
},
MyRow {
var: MyRowVariant::UInt128(11),
},
];

// Write to the table.
client
.query(
"
INSERT INTO test_var VALUES
([1, 2]),
(true),
('2021-01-01' :: Date),
('foobar' :: FixedString(6)),
(100.5 :: Float32), (200.1 :: Float64),
(2 :: Int8), (3 :: Int16), (4 :: Int32), (5 :: Int64), (6 :: Int128),
('my_string' :: String),
(7 :: UInt8), (8 :: UInt16), (9 :: UInt32), (10 :: UInt64), (11 :: UInt128)",
)
.execute()
.await
.unwrap();
let mut insert = client.insert("test_var").unwrap();
for row in &rows {
insert.write(row).await.unwrap();
}
insert.end().await.unwrap();

// Read from the table.
let rows = client
let result_rows = client
.query("SELECT ?fields FROM test_var")
.fetch_all::<MyRow>()
.await
.unwrap();

let expected = vec![
MyRowVariant::Array(vec![1, 2]),
MyRowVariant::Boolean(true),
MyRowVariant::Date(time::Date::from_calendar_date(2021, January, 1).unwrap()),
MyRowVariant::FixedString(*b"foobar"),
MyRowVariant::Float32(100.5),
MyRowVariant::Float64(200.1),
MyRowVariant::Int8(2),
MyRowVariant::Int16(3),
MyRowVariant::Int32(4),
MyRowVariant::Int64(5),
MyRowVariant::Int128(6),
MyRowVariant::String("my_string".to_string()),
MyRowVariant::UInt8(7),
MyRowVariant::UInt16(8),
MyRowVariant::UInt32(9),
MyRowVariant::UInt64(10),
MyRowVariant::UInt128(11),
];

assert_eq!(rows.len(), expected.len());
assert_eq!(result_rows.len(), rows.len());
rows.iter()
.zip(expected.iter())
.for_each(|(row, expected)| {
assert_eq!(row.var, *expected);
.zip(result_rows.iter())
.for_each(|(row, result_row)| {
assert_eq!(row, result_row);
});
}

0 comments on commit 352c6d5

Please sign in to comment.