Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add respective json_is UDFs for JSON type #4726

Merged
merged 5 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions src/common/function/src/scalars/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@

use std::sync::Arc;
mod json_get;
mod json_is;
mod json_to_string;
mod to_json;
mod parse_json;

use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
use json_is::{
JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
};
use json_to_string::JsonToStringFunction;
use to_json::ToJsonFunction;
use parse_json::ParseJsonFunction;

use crate::function_registry::FunctionRegistry;

Expand All @@ -28,11 +32,19 @@ pub(crate) struct JsonFunction;
impl JsonFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(JsonToStringFunction));
registry.register(Arc::new(ToJsonFunction));
registry.register(Arc::new(ParseJsonFunction));

registry.register(Arc::new(JsonGetInt));
registry.register(Arc::new(JsonGetFloat));
registry.register(Arc::new(JsonGetString));
registry.register(Arc::new(JsonGetBool));

registry.register(Arc::new(JsonIsNull));
registry.register(Arc::new(JsonIsInt));
registry.register(Arc::new(JsonIsFloat));
registry.register(Arc::new(JsonIsString));
registry.register(Arc::new(JsonIsBool));
registry.register(Arc::new(JsonIsArray));
registry.register(Arc::new(JsonIsObject));
}
}
2 changes: 1 addition & 1 deletion src/common/function/src/scalars/json/json_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
/// If the path does not exist or the value is not the type specified, return `NULL`.
macro_rules! json_get {
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[derive(Clone, Debug, Default)]
Expand Down
214 changes: 214 additions & 0 deletions src/common/function/src/scalars/json/json_is.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::{self, Display};

use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;

use crate::function::{Function, FunctionContext};

/// Checks if the input is a JSON object of the given type.
macro_rules! json_is {
($name:ident, $json_type:ident, $doc:expr) => {
paste::paste! {
#[derive(Clone, Debug, Default)]
pub struct $name;

impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
}

fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}

fn signature(&self) -> Signature {
Signature::exact(vec![ConcreteDataType::json_datatype()], Volatility::Immutable)
}

fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly one, have: {}",
columns.len()
),
}
);

let jsons = &columns[0];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = BooleanVectorBuilder::with_capacity(size);

match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let json = json.as_binary();
let result = match json {
Ok(Some(json)) => {
Some(jsonb::[<is_ $json_type>](json))
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: stringify!([<$name:snake>]),
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}

Ok(results.to_vector())
}
}

impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
}
}
}
}
}

json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
json_is!(
JsonIsBool,
boolean,
"Checks if the input JSONB is a boolean type JSON value"
);
json_is!(
JsonIsInt,
i64,
"Checks if the input JSONB is a integer type JSON value"
);
json_is!(
JsonIsFloat,
number,
"Checks if the input JSONB is a JSON float"
);
json_is!(
JsonIsString,
string,
"Checks if the input JSONB is a JSON string"
);
json_is!(
JsonIsArray,
array,
"Checks if the input JSONB is a JSON array"
);
json_is!(
JsonIsObject,
object,
"Checks if the input JSONB is a JSON object"
);

mod tests {
WenyXu marked this conversation as resolved.
Show resolved Hide resolved
use std::sync::Arc;

Check failure on line 134 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Check (ubuntu-20.04)

unused import: `std::sync::Arc`

Check failure on line 134 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused import: `std::sync::Arc`

Check failure on line 134 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `std::sync::Arc`

use datatypes::scalars::ScalarVector;

Check failure on line 136 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Check (ubuntu-20.04)

unused import: `datatypes::scalars::ScalarVector`

Check failure on line 136 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused import: `datatypes::scalars::ScalarVector`

Check failure on line 136 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `datatypes::scalars::ScalarVector`
use datatypes::vectors::BinaryVector;

Check failure on line 137 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Check (ubuntu-20.04)

unused import: `datatypes::vectors::BinaryVector`

Check failure on line 137 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused import: `datatypes::vectors::BinaryVector`

Check failure on line 137 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `datatypes::vectors::BinaryVector`

use super::*;

Check failure on line 139 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Check (ubuntu-20.04)

unused import: `super::*`

Check failure on line 139 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused import: `super::*`

Check failure on line 139 in src/common/function/src/scalars/json/json_is.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `super::*`
CookiePieWw marked this conversation as resolved.
Show resolved Hide resolved

#[test]
fn test_json_is_functions() {
let json_is_functions: [&dyn Function; 6] = [
&JsonIsBool,
&JsonIsInt,
&JsonIsFloat,
&JsonIsString,
&JsonIsArray,
&JsonIsObject,
];
let expected_names = [
"json_is_bool",
"json_is_int",
"json_is_float",
"json_is_string",
"json_is_array",
"json_is_object",
];
for (func, expected_name) in json_is_functions.iter().zip(expected_names.iter()) {
assert_eq!(func.name(), *expected_name);
assert_eq!(
func.return_type(&[ConcreteDataType::json_datatype()])
.unwrap(),
ConcreteDataType::boolean_datatype()
);
assert_eq!(
func.signature(),
Signature::exact(
vec![ConcreteDataType::json_datatype()],
Volatility::Immutable
)
);
}

let json_strings = [
r#"true"#,
r#"1"#,
r#"1.0"#,
r#""The pig fly through a castle, and has been attracted by the princess.""#,
r#"[1, 2]"#,
r#"{"a": 1}"#,
];
let expected_results = [
[true, false, false, false, false, false],
[false, true, false, false, false, false],
// Integers are also floats
[false, true, true, false, false, false],
[false, false, false, true, false, false],
[false, false, false, false, true, false],
[false, false, false, false, false, true],
];

let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];

for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
let vector = func.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(vector.len(), json_strings.len());

for (i, expected) in expected_result.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap().unwrap();
assert_eq!(result, *expected);
}
}
}
}
2 changes: 1 addition & 1 deletion src/common/function/src/scalars/json/json_to_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ mod tests {
use super::*;

#[test]
fn test_get_by_path_function() {
fn test_json_to_string_function() {
let json_to_string = JsonToStringFunction;

assert_eq!("json_to_string", json_to_string.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ use crate::function::{Function, FunctionContext};

/// Parses the `String` into `JSONB`.
#[derive(Clone, Debug, Default)]
pub struct ToJsonFunction;
pub struct ParseJsonFunction;

const NAME: &str = "to_json";
const NAME: &str = "parse_json";

impl Function for ToJsonFunction {
impl Function for ParseJsonFunction {
fn name(&self) -> &str {
NAME
}
Expand Down Expand Up @@ -101,9 +101,9 @@ impl Function for ToJsonFunction {
}
}

impl Display for ToJsonFunction {
impl Display for ParseJsonFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TO_JSON")
write!(f, "PARSE_JSON")
}
}

Expand All @@ -119,17 +119,17 @@ mod tests {

#[test]
fn test_get_by_path_function() {
let to_json = ToJsonFunction;
let parse_json = ParseJsonFunction;

assert_eq!("to_json", to_json.name());
assert_eq!("parse_json", parse_json.name());
assert_eq!(
ConcreteDataType::json_datatype(),
to_json
parse_json
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);

assert!(matches!(to_json.signature(),
assert!(matches!(parse_json.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
Expand All @@ -152,13 +152,12 @@ mod tests {

let json_string_vector = StringVector::from_vec(json_strings.to_vec());
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
let vector = parse_json.eval(FunctionContext::default(), &args).unwrap();

assert_eq!(3, vector.len());
for (i, gt) in jsonbs.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_binary().unwrap().unwrap();
// remove whitespaces
assert_eq!(gt, result);
}
}
Expand Down
56 changes: 0 additions & 56 deletions tests/cases/standalone/common/function/json.sql

This file was deleted.

Loading
Loading