From 32bb26dcdfa5b03ef2d29ff1ddd7f483304d6afa Mon Sep 17 00:00:00 2001 From: "Reilly.tang" Date: Sat, 9 Mar 2024 06:12:32 +0800 Subject: [PATCH] Add `to_unixtime` function (#9077) * [task #5568] add_to_unixtime_function Signed-off-by: tangruilin * Fix clippy --------- Signed-off-by: tangruilin Co-authored-by: Andrew Lamb --- datafusion/functions/src/datetime/mod.rs | 10 ++- .../functions/src/datetime/to_unixtime.rs | 83 +++++++++++++++++++ datafusion/proto/proto/datafusion.proto | 2 + datafusion/proto/src/generated/prost.rs | 1 + .../sqllogictest/test_files/timestamps.slt | 69 +++++++++++++++ 5 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 datafusion/functions/src/datetime/to_unixtime.rs diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index 233e8b2cdbb4..6c9ea1944b1b 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -24,9 +24,11 @@ use datafusion_expr::ScalarUDF; mod common; mod to_date; mod to_timestamp; +mod to_unixtime; // create UDFs make_udf_function!(to_date::ToDateFunc, TO_DATE, to_date); +make_udf_function!(to_unixtime::ToUnixtimeFunc, TO_UNIXTIME, to_unixtime); make_udf_function!(to_timestamp::ToTimestampFunc, TO_TIMESTAMP, to_timestamp); make_udf_function!( to_timestamp::ToTimestampSecondsFunc, @@ -68,7 +70,7 @@ pub mod expr_fn { /// # use datafusion_expr::col; /// # use datafusion::prelude::*; /// # use datafusion_functions::expr_fn::to_date; - /// + /// /// // define a schema. /// let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)])); /// @@ -105,6 +107,11 @@ pub mod expr_fn { super::to_date().call(args) } + #[doc = "converts a string and optional formats to a Unixtime"] + pub fn to_unixtime(args: Vec) -> Expr { + super::to_unixtime().call(args) + } + #[doc = "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`"] pub fn to_timestamp(args: Vec) -> Expr { super::to_timestamp().call(args) @@ -135,6 +142,7 @@ pub mod expr_fn { pub fn functions() -> Vec> { vec![ to_date(), + to_unixtime(), to_timestamp(), to_timestamp_seconds(), to_timestamp_millis(), diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs new file mode 100644 index 000000000000..ed56cdf8d0af --- /dev/null +++ b/datafusion/functions/src/datetime/to_unixtime.rs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; + +use arrow::datatypes::{DataType, TimeUnit}; + +use crate::datetime::common::*; +use datafusion_common::{exec_err, Result}; +use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; + +use super::to_timestamp::ToTimestampSecondsFunc; + +#[derive(Debug)] +pub(super) struct ToUnixtimeFunc { + signature: Signature, +} + +impl ToUnixtimeFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for ToUnixtimeFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "to_unixtime" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Int64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + if args.is_empty() { + return exec_err!("to_unixtime function requires 1 or more arguments, got 0"); + } + + // validate that any args after the first one are Utf8 + if args.len() > 1 { + validate_data_types(args, "to_unixtime")?; + } + + match args[0].data_type() { + DataType::Int32 | DataType::Int64 | DataType::Null | DataType::Float64 => { + args[0].cast_to(&DataType::Int64, None) + } + DataType::Date64 | DataType::Date32 | DataType::Timestamp(_, None) => args[0] + .cast_to(&DataType::Timestamp(TimeUnit::Second, None), None)? + .cast_to(&DataType::Int64, None), + DataType::Utf8 => ToTimestampSecondsFunc::new() + .invoke(args)? + .cast_to(&DataType::Int64, None), + other => { + exec_err!("Unsupported data type {:?} for function to_unixtime", other) + } + } + } +} diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 6700877f2610..faffe57c071a 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -1,5 +1,6 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -684,6 +685,7 @@ enum ScalarFunction { /// 135 is RegexpLike ToChar = 136; /// 137 was ToDate + /// 138 was ToUnixtime } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index c12d04f33f9e..4c4b17f1a80b 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2773,6 +2773,7 @@ pub enum ScalarFunction { /// / 135 is RegexpLike /// /// / 137 was ToDate + /// / 138 was ToUnixtime ToChar = 136, } impl ScalarFunction { diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index f0483aec8946..0b8284a45a04 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -2682,3 +2682,72 @@ SELECT to_char(null, '%d-%m-%Y'); statement ok drop table formats; + +########## +## to_unixtime tests +########## + +query I +select to_unixtime('2020-09-08T12:00:00+00:00'); +---- +1599566400 + +query I +select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); +---- +1673638290 + +query I +select to_unixtime('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); +---- +1684295940 + +query I +select to_unixtime(arrow_cast('2020-09-08T12:00:00+00:00', 'Date64')); +---- +1599566400 + +query I +select to_unixtime(arrow_cast('2020-09-08', 'Date32')); +---- +1599523200 + +query I +select to_unixtime(to_timestamp('2020-09-08')); +---- +1599523200 + +query I +select to_unixtime(to_timestamp_seconds('2020-09-08')); +---- +1599523200 + +query I +select to_unixtime(to_timestamp_millis('2020-09-08')); +---- +1599523200 + +query I +select to_unixtime(to_timestamp_micros('2020-09-08')); +---- +1599523200 + +query I +select to_unixtime(to_timestamp_nanos('2020-09-08')); +---- +1599523200 + +query I +select to_unixtime(arrow_cast(1599523200, 'Int32')); +---- +1599523200 + +query I +select to_unixtime(arrow_cast(1599523200, 'Int64')); +---- +1599523200 + +query I +select to_unixtime(arrow_cast(1599523200.414, 'Float64')); +---- +1599523200