diff --git a/README.md b/README.md
index 5d0b096c1de1..30505d7ca132 100644
--- a/README.md
+++ b/README.md
@@ -44,8 +44,9 @@
 DataFusion is an extensible query engine written in [Rust] that
 uses [Apache Arrow] as its in-memory format.
 
-The DataFusion libraries in this repository are used to build data-centric system software. DataFusion also provides the
-following subprojects, which are packaged versions of DataFusion intended for end users.
+This crate provides libraries and binaries for developers building fast and
+feature rich database and analytic systems, customized to particular workloads.
+See [use cases] for examples. The following related subprojects target end users:
 
 - [DataFusion Python](https://github.com/apache/datafusion-python/) offers a Python interface for SQL and DataFrame
   queries.
@@ -54,13 +55,10 @@ following subprojects, which are packaged versions of DataFusion intended for en
 - [DataFusion Comet](https://github.com/apache/datafusion-comet/) is an accelerator for Apache Spark based on
   DataFusion.
 
-The target audience for the DataFusion crates in this repository are
-developers building fast and feature rich database and analytic systems,
-customized to particular workloads. See [use cases] for examples.
-
-DataFusion offers [SQL] and [`Dataframe`] APIs,
-excellent [performance], built-in support for CSV, Parquet, JSON, and Avro,
-extensive customization, and a great community.
+"Out of the box,"
+DataFusion offers [SQL] and [`Dataframe`] APIs, excellent [performance],
+built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and
+a great community.
 
 DataFusion features a full query planner, a columnar, streaming, multi-threaded,
 vectorized execution engine, and partitioned data sources. You can
diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs
index 36254192550c..422fcb5eb3e0 100644
--- a/datafusion/common/src/test_util.rs
+++ b/datafusion/common/src/test_util.rs
@@ -279,8 +279,88 @@ pub fn get_data_dir(
     }
 }
 
+#[macro_export]
+macro_rules! create_array {
+    (Boolean, $values: expr) => {
+        std::sync::Arc::new(arrow::array::BooleanArray::from($values))
+    };
+    (Int8, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Int8Array::from($values))
+    };
+    (Int16, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Int16Array::from($values))
+    };
+    (Int32, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Int32Array::from($values))
+    };
+    (Int64, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Int64Array::from($values))
+    };
+    (UInt8, $values: expr) => {
+        std::sync::Arc::new(arrow::array::UInt8Array::from($values))
+    };
+    (UInt16, $values: expr) => {
+        std::sync::Arc::new(arrow::array::UInt16Array::from($values))
+    };
+    (UInt32, $values: expr) => {
+        std::sync::Arc::new(arrow::array::UInt32Array::from($values))
+    };
+    (UInt64, $values: expr) => {
+        std::sync::Arc::new(arrow::array::UInt64Array::from($values))
+    };
+    (Float16, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Float16Array::from($values))
+    };
+    (Float32, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Float32Array::from($values))
+    };
+    (Float64, $values: expr) => {
+        std::sync::Arc::new(arrow::array::Float64Array::from($values))
+    };
+    (Utf8, $values: expr) => {
+        std::sync::Arc::new(arrow::array::StringArray::from($values))
+    };
+}
+
+/// Creates a record batch from literal slice of values, suitable for rapid
+/// testing and development.
+///
+/// Example:
+/// ```
+/// use datafusion_common::{record_batch, create_array};
+/// let batch = record_batch!(
+///     ("a", Int32, vec![1, 2, 3]),
+///     ("b", Float64, vec![Some(4.0), None, Some(5.0)]),
+///     ("c", Utf8, vec!["alpha", "beta", "gamma"])
+/// );
+/// ```
+#[macro_export]
+macro_rules! record_batch {
+    ($(($name: expr, $type: ident, $values: expr)),*) => {
+        {
+            let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![
+                $(
+                    arrow_schema::Field::new($name, arrow_schema::DataType::$type, true),
+                )*
+            ]));
+
+            let batch = arrow_array::RecordBatch::try_new(
+                schema,
+                vec![$(
+                    create_array!($type, $values),
+                )*]
+            );
+
+            batch
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use crate::cast::{as_float64_array, as_int32_array, as_string_array};
+    use crate::error::Result;
+
     use super::*;
     use std::env;
 
@@ -333,4 +413,44 @@ mod tests {
         let res = parquet_test_data();
         assert!(PathBuf::from(res).is_dir());
     }
+
+    #[test]
+    fn test_create_record_batch() -> Result<()> {
+        use arrow_array::Array;
+
+        let batch = record_batch!(
+            ("a", Int32, vec![1, 2, 3, 4]),
+            ("b", Float64, vec![Some(4.0), None, Some(5.0), None]),
+            ("c", Utf8, vec!["alpha", "beta", "gamma", "delta"])
+        )?;
+
+        assert_eq!(3, batch.num_columns());
+        assert_eq!(4, batch.num_rows());
+
+        let values: Vec<_> = as_int32_array(batch.column(0))?
+            .values()
+            .iter()
+            .map(|v| v.to_owned())
+            .collect();
+        assert_eq!(values, vec![1, 2, 3, 4]);
+
+        let values: Vec<_> = as_float64_array(batch.column(1))?
+            .values()
+            .iter()
+            .map(|v| v.to_owned())
+            .collect();
+        assert_eq!(values, vec![4.0, 0.0, 5.0, 0.0]);
+
+        let nulls: Vec<_> = as_float64_array(batch.column(1))?
+            .nulls()
+            .unwrap()
+            .iter()
+            .collect();
+        assert_eq!(nulls, vec![true, false, true, false]);
+
+        let values: Vec<_> = as_string_array(batch.column(2))?.iter().flatten().collect();
+        assert_eq!(values, vec!["alpha", "beta", "gamma", "delta"]);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs
index 53cfe94ecab3..d9415028c124 100644
--- a/datafusion/core/src/bin/print_functions_docs.rs
+++ b/datafusion/core/src/bin/print_functions_docs.rs
@@ -108,7 +108,7 @@ fn print_docs(
             .collect::<Vec<_>>();
 
         // write out section header
-        let _ = writeln!(docs, "## {} ", doc_section.label);
+        let _ = writeln!(docs, "\n## {} \n", doc_section.label);
 
         if let Some(description) = doc_section.description {
             let _ = writeln!(docs, "{description}");
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index 24a4938e7b2b..3c2d1b0205d6 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -37,14 +37,14 @@ use crate::physical_planner::create_physical_sort_exprs;
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
+use datafusion_catalog::Session;
 use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
 use datafusion_execution::TaskContext;
 use datafusion_expr::dml::InsertOp;
+use datafusion_expr::SortExpr;
 use datafusion_physical_plan::metrics::MetricsSet;
 
 use async_trait::async_trait;
-use datafusion_catalog::Session;
-use datafusion_expr::SortExpr;
 use futures::StreamExt;
 use log::debug;
 use parking_lot::Mutex;
@@ -241,7 +241,7 @@ impl TableProvider for MemTable {
                     )
                 })
                 .collect::<Result<Vec<_>>>()?;
-            exec = exec.with_sort_information(file_sort_order);
+            exec = exec.try_with_sort_information(file_sort_order)?;
         }
 
         Ok(Arc::new(exec))
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index b0852501415e..64a7514ebd5e 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -395,7 +395,8 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
     let running_source = Arc::new(
         MemoryExec::try_new(&[input1.clone()], schema.clone(), None)
             .unwrap()
-            .with_sort_information(vec![sort_keys]),
+            .try_with_sort_information(vec![sort_keys])
+            .unwrap(),
     );
 
     let aggregate_expr =
diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
index 0cd702372f7c..a72affc2b079 100644
--- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
@@ -358,7 +358,8 @@ mod sp_repartition_fuzz_tests {
         let running_source = Arc::new(
             MemoryExec::try_new(&[input1.clone()], schema.clone(), None)
                 .unwrap()
-                .with_sort_information(vec![sort_keys.clone()]),
+                .try_with_sort_information(vec![sort_keys.clone()])
+                .unwrap(),
         );
         let hash_exprs = vec![col("c", &schema).unwrap()];
 
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index b9881c9f23cf..feffb11bf700 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -647,7 +647,7 @@ async fn run_window_test(
     ];
     let mut exec1 = Arc::new(
         MemoryExec::try_new(&[vec![concat_input_record]], schema.clone(), None)?
-            .with_sort_information(vec![source_sort_keys.clone()]),
+            .try_with_sort_information(vec![source_sort_keys.clone()])?,
     ) as _;
     // Table is ordered according to ORDER BY a, b, c In linear test we use PARTITION BY b, ORDER BY a
     // For WindowAggExec  to produce correct result it need table to be ordered by b,a. Hence add a sort.
@@ -673,7 +673,7 @@ async fn run_window_test(
     )?) as _;
     let exec2 = Arc::new(
         MemoryExec::try_new(&[input1.clone()], schema.clone(), None)?
-            .with_sort_information(vec![source_sort_keys.clone()]),
+            .try_with_sort_information(vec![source_sort_keys.clone()])?,
     );
     let running_window_exec = Arc::new(BoundedWindowAggExec::try_new(
         vec![create_window_expr(
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index ec66df45c7ba..fc2fb9afb5f9 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -840,7 +840,7 @@ impl TableProvider for SortedTableProvider {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let mem_exec =
             MemoryExec::try_new(&self.batches, self.schema(), projection.cloned())?
-                .with_sort_information(self.sort_information.clone());
+                .try_with_sort_information(self.sort_information.clone())?;
 
         Ok(Arc::new(mem_exec))
     }
diff --git a/datafusion/expr/src/udf_docs.rs b/datafusion/expr/src/udf_docs.rs
index e8245588d945..e0ce7526036e 100644
--- a/datafusion/expr/src/udf_docs.rs
+++ b/datafusion/expr/src/udf_docs.rs
@@ -155,7 +155,7 @@ impl DocumentationBuilder {
     ///
     /// ```text
     /// <arg_name>:
-    ///   <expression_type> expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+    ///   <expression_type> expression to operate on. Can be a constant, column, or function, and any combination of operators.
     /// ```
     pub fn with_standard_argument(
         self,
diff --git a/datafusion/functions-aggregate/src/approx_distinct.rs b/datafusion/functions-aggregate/src/approx_distinct.rs
index cf8217fe981d..efa9a6d8daad 100644
--- a/datafusion/functions-aggregate/src/approx_distinct.rs
+++ b/datafusion/functions-aggregate/src/approx_distinct.rs
@@ -31,13 +31,17 @@ use datafusion_common::ScalarValue;
 use datafusion_common::{
     downcast_value, internal_err, not_impl_err, DataFusionError, Result,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+};
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::hash::Hash;
 use std::marker::PhantomData;
+use std::sync::OnceLock;
 make_udaf_expr_and_func!(
     ApproxDistinct,
     approx_distinct,
@@ -303,4 +307,33 @@ impl AggregateUDFImpl for ApproxDistinct {
         };
         Ok(accumulator)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_distinct_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_distinct_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the approximate number of distinct input values calculated using the HyperLogLog algorithm.",
+            )
+            .with_syntax_example("approx_distinct(expression)")
+            .with_sql_example(r#"```sql
+> SELECT approx_distinct(column_name) FROM table_name;
++-----------------------------------+
+| approx_distinct(column_name)      |
++-----------------------------------+
+| 42                                |
++-----------------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index 7a7b12432544..dd5bb8d441ed 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -19,15 +19,19 @@
 
 use std::any::Any;
 use std::fmt::Debug;
+use std::sync::OnceLock;
 
 use arrow::{datatypes::DataType, datatypes::Field};
 use arrow_schema::DataType::{Float64, UInt64};
 
 use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+};
 
 use crate::approx_percentile_cont::ApproxPercentileAccumulator;
 
@@ -116,4 +120,33 @@ impl AggregateUDFImpl for ApproxMedian {
             acc_args.exprs[0].data_type(acc_args.schema)?,
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_median_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_median_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`.",
+            )
+            .with_syntax_example("approx_median(expression)")
+            .with_sql_example(r#"```sql
+> SELECT approx_median(column_name) FROM table_name;
++-----------------------------------+
+| approx_median(column_name)        |
++-----------------------------------+
+| 23.5                              |
++-----------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 5578aebbf403..b4488d6d9e61 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{Array, RecordBatch};
 use arrow::compute::{filter, is_not_null};
@@ -34,12 +34,13 @@ use datafusion_common::{
     downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
     DataFusionError, Result, ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ColumnarValue, Expr, Signature, TypeSignature,
-    Volatility,
+    Accumulator, AggregateUDFImpl, ColumnarValue, Documentation, Expr, Signature,
+    TypeSignature, Volatility,
 };
 use datafusion_functions_aggregate_common::tdigest::{
     TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
@@ -268,6 +269,36 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         }
         Ok(arg_types[0].clone())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_percentile_cont_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_percentile_cont_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the approximate percentile of input values using the t-digest algorithm.",
+            )
+            .with_syntax_example("approx_percentile_cont(expression, percentile, centroids)")
+            .with_sql_example(r#"```sql
+> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
++-------------------------------------------------+
+| approx_percentile_cont(column_name, 0.75, 100)  |
++-------------------------------------------------+
+| 65.0                                            |
++-------------------------------------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("percentile", "Percentile to compute. Must be a float value between 0 and 1 (inclusive).")
+            .with_argument("centroids", "Number of centroids to use in the t-digest algorithm. _Default is 100_. A higher number results in more accurate approximation but requires more memory.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index fee67ba1623d..8cbf9587a75a 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::{
     array::ArrayRef,
@@ -26,10 +26,13 @@ use arrow::{
 
 use datafusion_common::ScalarValue;
 use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::Volatility::Immutable;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, TypeSignature};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, TypeSignature,
+};
 use datafusion_functions_aggregate_common::tdigest::{
     Centroid, TDigest, DEFAULT_MAX_SIZE,
 };
@@ -151,6 +154,37 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         self.approx_percentile_cont.state_fields(args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_percentile_cont_with_weight_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_percentile_cont_with_weight_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the weighted approximate percentile of input values using the t-digest algorithm.",
+            )
+            .with_syntax_example("approx_percentile_cont_with_weight(expression, weight, percentile)")
+            .with_sql_example(r#"```sql
+> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
++----------------------------------------------------------------------+
+| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
++----------------------------------------------------------------------+
+| 78.5                                                                 |
++----------------------------------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("weight", "Expression to use as weight. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("percentile", "Percentile to compute. Must be a float value between 0 and 1 (inclusive).")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 15146fc4a2d8..b44c4e6874ef 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -25,15 +25,16 @@ use datafusion_common::cast::as_list_array;
 use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
 use datafusion_common::{exec_err, ScalarValue};
 use datafusion_common::{internal_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::AggregateUDFImpl;
 use datafusion_expr::{Accumulator, Signature, Volatility};
+use datafusion_expr::{AggregateUDFImpl, Documentation};
 use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
 use datafusion_functions_aggregate_common::utils::ordering_fields;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use std::collections::{HashSet, VecDeque};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 make_udaf_expr_and_func!(
     ArrayAgg,
@@ -142,6 +143,35 @@ impl AggregateUDFImpl for ArrayAgg {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(array_agg_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_array_agg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_array_agg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns an array created from the expression elements. If ordering is required, elements are inserted in the specified order.",
+            )
+            .with_syntax_example("array_agg(expression [ORDER BY expression])")
+            .with_sql_example(r#"```sql
+> SELECT array_agg(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| array_agg(column_name ORDER BY other_column)  |
++-----------------------------------------------+
+| [element1, element2, element3]                |
++-----------------------------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index ddad76a8734b..ad58eecdf949 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -28,12 +28,14 @@ use arrow::datatypes::{
     Float64Type, UInt64Type,
 };
 use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{avg_return_type, coerce_avg_type};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
+    Accumulator, AggregateUDFImpl, Documentation, EmitTo, GroupsAccumulator,
+    ReversedUDAF, Signature,
 };
 
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
@@ -45,7 +47,7 @@ use datafusion_functions_aggregate_common::utils::DecimalAverager;
 use log::debug;
 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 make_udaf_expr_and_func!(
     Avg,
@@ -235,6 +237,36 @@ impl AggregateUDFImpl for Avg {
         }
         coerce_avg_type(self.name(), arg_types)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_avg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_avg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the average of numeric values in the specified column.",
+            )
+            .with_syntax_example("avg(expression)")
+            .with_sql_example(r#"```sql
+> SELECT avg(column_name) FROM table_name;
++---------------------------+
+| avg(column_name)           |
++---------------------------+
+| 42.75                      |
++---------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("Aliases: ", "`mean`")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute the average
diff --git a/datafusion/functions-aggregate/src/bool_and_or.rs b/datafusion/functions-aggregate/src/bool_and_or.rs
index 7cc7d9ff7fec..e212ba8d6172 100644
--- a/datafusion/functions-aggregate/src/bool_and_or.rs
+++ b/datafusion/functions-aggregate/src/bool_and_or.rs
@@ -18,6 +18,7 @@
 //! Defines physical expressions that can evaluated at runtime during query execution
 
 use std::any::Any;
+use std::sync::OnceLock;
 
 use arrow::array::ArrayRef;
 use arrow::array::BooleanArray;
@@ -29,10 +30,12 @@ use arrow::datatypes::Field;
 use datafusion_common::internal_err;
 use datafusion_common::{downcast_value, not_impl_err};
 use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, ReversedUDAF,
+    Signature, Volatility,
 };
 
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
@@ -172,6 +175,34 @@ impl AggregateUDFImpl for BoolAnd {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_bool_and_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_bool_and_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns true if all non-null input values are true, otherwise false.",
+            )
+            .with_syntax_example("bool_and(expression)")
+            .with_sql_example(r#"```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug, Default)]
@@ -293,6 +324,32 @@ impl AggregateUDFImpl for BoolOr {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_bool_or_doc())
+    }
+}
+
+fn get_bool_or_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns true if any non-null input value is true, otherwise false.",
+            )
+            .with_syntax_example("bool_or(expression)")
+            .with_sql_example(r#"```sql
+> SELECT bool_or(column_name) FROM table_name;
++----------------------------+
+| bool_or(column_name)        |
++----------------------------+
+| true                        |
++----------------------------+
+```"#)
+            .with_standard_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug, Default)]
diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs
index 88f01b06d2d9..60be3608e99e 100644
--- a/datafusion/functions-aggregate/src/correlation.rs
+++ b/datafusion/functions-aggregate/src/correlation.rs
@@ -19,7 +19,7 @@
 
 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::compute::{and, filter, is_not_null};
 use arrow::{
@@ -30,11 +30,12 @@ use arrow::{
 use crate::covariance::CovarianceAccumulator;
 use crate::stddev::StddevAccumulator;
 use datafusion_common::{plan_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::{
     function::{AccumulatorArgs, StateFieldsArgs},
     type_coercion::aggregates::NUMERICS,
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 
@@ -107,6 +108,35 @@ impl AggregateUDFImpl for Correlation {
             ),
         ])
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_corr_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_corr_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the coefficient of correlation between two numeric values.",
+            )
+            .with_syntax_example("corr(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> SELECT corr(column1, column2) FROM table_name;
++--------------------------------+
+| corr(column1, column2)         |
++--------------------------------+
+| 0.85                           |
++--------------------------------+
+```"#)
+            .with_argument("expression1", "First expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("expression2", "Second expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute correlation
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index cc245b3572ec..23dd5b65bf82 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -20,8 +20,9 @@ use datafusion_common::stats::Precision;
 use datafusion_functions_aggregate_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
 use datafusion_physical_expr::expressions;
 use std::collections::HashSet;
+use std::fmt::Debug;
 use std::ops::BitAnd;
-use std::{fmt::Debug, sync::Arc};
+use std::sync::{Arc, OnceLock};
 
 use arrow::{
     array::{ArrayRef, AsArray},
@@ -43,10 +44,11 @@ use arrow::{
 use datafusion_common::{
     downcast_value, internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
-    EmitTo, GroupsAccumulator, Signature, Volatility,
+    Documentation, EmitTo, GroupsAccumulator, Signature, Volatility,
 };
 use datafusion_expr::{Expr, ReversedUDAF, StatisticsArgs, TypeSignature};
 use datafusion_functions_aggregate_common::aggregate::count_distinct::{
@@ -324,6 +326,41 @@ impl AggregateUDFImpl for Count {
         }
         None
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_count_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_count_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the number of non-null values in the specified column. To include null values in the total count, use `count(*)`.",
+            )
+            .with_syntax_example("count(expression)")
+            .with_sql_example(r#"```sql
+> SELECT count(column_name) FROM table_name;
++-----------------------+
+| count(column_name)     |
++-----------------------+
+| 100                   |
++-----------------------+
+
+> SELECT count(*) FROM table_name;
++------------------+
+| count(*)         |
++------------------+
+| 120              |
++------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs
index d0abb079ef15..c599b58ed207 100644
--- a/datafusion/functions-aggregate/src/covariance.rs
+++ b/datafusion/functions-aggregate/src/covariance.rs
@@ -18,6 +18,7 @@
 //! [`CovarianceSample`]: covariance sample aggregations.
 
 use std::fmt::Debug;
+use std::sync::OnceLock;
 
 use arrow::{
     array::{ArrayRef, Float64Array, UInt64Array},
@@ -29,11 +30,12 @@ use datafusion_common::{
     downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, Result,
     ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::{
     function::{AccumulatorArgs, StateFieldsArgs},
     type_coercion::aggregates::NUMERICS,
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 
@@ -124,6 +126,36 @@ impl AggregateUDFImpl for CovarianceSample {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_covar_samp_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_covar_samp_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the sample covariance of a set of number pairs.",
+            )
+            .with_syntax_example("covar_samp(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> SELECT covar_samp(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_samp(column1, column2)      |
++-----------------------------------+
+| 8.25                              |
++-----------------------------------+
+```"#, 
+            )
+            .with_argument("expression1", "First expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("expression2", "Second expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 pub struct CovariancePopulation {
@@ -193,6 +225,34 @@ impl AggregateUDFImpl for CovariancePopulation {
             StatsType::Population,
         )?))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_covar_pop_doc())
+    }
+}
+
+fn get_covar_pop_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the population covariance of a set of number pairs.",
+            )
+            .with_syntax_example("covar_pop(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> SELECT covar_pop(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_pop(column1, column2)       |
++-----------------------------------+
+| 7.63                              |
++-----------------------------------+
+```"#, 
+            )
+            .with_argument("expression1", "First expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("expression2", "Second expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute covariance
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 41ac7875795d..02b8c522823e 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -19,7 +19,7 @@
 
 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{ArrayRef, AsArray, BooleanArray};
 use arrow::compute::{self, lexsort_to_indices, SortColumn};
@@ -28,11 +28,12 @@ use datafusion_common::utils::{compare_rows, get_row_at_idx, take_arrays};
 use datafusion_common::{
     arrow_datafusion_err, internal_err, DataFusionError, Result, ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Expr, ExprFunctionExt,
-    Signature, SortExpr, TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Documentation, Expr,
+    ExprFunctionExt, Signature, SortExpr, TypeSignature, Volatility,
 };
 use datafusion_functions_aggregate_common::utils::get_sort_options;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
@@ -165,6 +166,35 @@ impl AggregateUDFImpl for FirstValue {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(last_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_first_value_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_first_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.",
+            )
+            .with_syntax_example("first_value(expression [ORDER BY expression])")
+            .with_sql_example(r#"```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
@@ -466,6 +496,33 @@ impl AggregateUDFImpl for LastValue {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(first_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_last_value_doc())
+    }
+}
+
+fn get_last_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the last element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.",
+            )
+            .with_syntax_example("last_value(expression [ORDER BY expression])")
+            .with_sql_example(r#"```sql
+> SELECT last_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| last_value(column_name ORDER BY other_column) |
++-----------------------------------------------+
+| last_element                                  |
++-----------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/grouping.rs b/datafusion/functions-aggregate/src/grouping.rs
index 6fb7c3800f4e..09e9b90b2e6d 100644
--- a/datafusion/functions-aggregate/src/grouping.rs
+++ b/datafusion/functions-aggregate/src/grouping.rs
@@ -19,14 +19,18 @@
 
 use std::any::Any;
 use std::fmt;
+use std::sync::OnceLock;
 
 use arrow::datatypes::DataType;
 use arrow::datatypes::Field;
 use datafusion_common::{not_impl_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+};
 
 make_udaf_expr_and_func!(
     Grouping,
@@ -94,4 +98,37 @@ impl AggregateUDFImpl for Grouping {
             "physical plan is not yet implemented for GROUPING aggregate function"
         )
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_grouping_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_grouping_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns 1 if the data is aggregated across the specified column, or 0 if it is not aggregated in the result set.",
+            )
+            .with_syntax_example("grouping(expression)")
+            .with_sql_example(r#"```sql
+> SELECT column_name, GROUPING(column_name) AS group_column
+  FROM table_name
+  GROUP BY GROUPING SETS ((column_name), ());
++-------------+-------------+
+| column_name | group_column |
++-------------+-------------+
+| value1      | 0           |
+| value2      | 0           |
+| NULL        | 1           |
++-------------+-------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to evaluate whether data is aggregated across the specified column. Can be a constant, column, or function.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs
index 7dd0de14c3c0..8eb17db1eca9 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use std::collections::HashSet;
-use std::fmt::Formatter;
-use std::{fmt::Debug, sync::Arc};
+use std::fmt::{Debug, Formatter};
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{downcast_integer, ArrowNumericType};
 use arrow::{
@@ -33,10 +33,11 @@ use arrow::array::ArrowNativeTypeOp;
 use arrow::datatypes::ArrowNativeType;
 
 use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
-    Signature, Volatility,
+    Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::utils::Hashable;
 
@@ -152,6 +153,35 @@ impl AggregateUDFImpl for Median {
     fn aliases(&self) -> &[String] {
         &[]
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_median_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_median_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the median value in the specified column.",
+            )
+            .with_syntax_example("median(expression)")
+            .with_sql_example(r#"```sql
+> SELECT median(column_name) FROM table_name;
++----------------------+
+| median(column_name)   |
++----------------------+
+| 45.5                 |
++----------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// The median accumulator accumulates the raw input values
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index 26ba97f505fd..2f7954a8ee02 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -40,6 +40,7 @@ use datafusion_common::stats::Precision;
 use datafusion_common::{
     downcast_value, exec_err, internal_err, ColumnStatistics, DataFusionError, Result,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use datafusion_physical_expr::expressions;
 use std::fmt::Debug;
@@ -54,11 +55,13 @@ use arrow::datatypes::{
 use crate::min_max::min_max_bytes::MinMaxBytesAccumulator;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{
-    function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Signature, Volatility,
+    function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Documentation, Signature,
+    Volatility,
 };
 use datafusion_expr::{GroupsAccumulator, StatisticsArgs};
 use half::f16;
 use std::ops::Deref;
+use std::sync::OnceLock;
 
 fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
     // make sure that the input types only has one element.
@@ -330,6 +333,35 @@ impl AggregateUDFImpl for Max {
     fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
         self.value_from_statistics(statistics_args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_max_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_max_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the maximum value in the specified column.",
+            )
+            .with_syntax_example("max(expression)")
+            .with_sql_example(r#"```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 // Statically-typed version of min/max(array) -> ScalarValue for string types
@@ -1134,7 +1166,35 @@ impl AggregateUDFImpl for Min {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_min_doc())
+    }
+}
+
+fn get_min_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the minimum value in the specified column.",
+            )
+            .with_syntax_example("min(expression)")
+            .with_sql_example(r#"```sql
+> SELECT min(column_name) FROM table_name;
++----------------------+
+| min(column_name)      |
++----------------------+
+| 12                   |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
+
 /// An accumulator to compute the minimum value
 #[derive(Debug)]
 pub struct MinAccumulator {
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index bbfe56914c91..6d8cea8f0531 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -20,18 +20,19 @@
 
 use std::any::Any;
 use std::collections::VecDeque;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{new_empty_array, ArrayRef, AsArray, StructArray};
 use arrow_schema::{DataType, Field, Fields};
 
 use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
 use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    lit, Accumulator, AggregateUDFImpl, ExprFunctionExt, ReversedUDAF, Signature,
-    SortExpr, Volatility,
+    lit, Accumulator, AggregateUDFImpl, Documentation, ExprFunctionExt, ReversedUDAF,
+    Signature, SortExpr, Volatility,
 };
 use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
 use datafusion_functions_aggregate_common::utils::ordering_fields;
@@ -161,6 +162,40 @@ impl AggregateUDFImpl for NthValueAgg {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Reversed(nth_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_nth_value_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_nth_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the nth value in a group of values.",
+            )
+            .with_syntax_example("nth_value(expression, n ORDER BY expression)")
+            .with_sql_example(r#"```sql
+> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id ORDER BY salary ASC) AS second_salary_by_dept
+  FROM employee;
++---------+--------+-------------------------+
+| dept_id | salary | second_salary_by_dept   |
++---------+--------+-------------------------+
+| 1       | 30000  | NULL                    |
+| 1       | 40000  | 40000                   |
+| 1       | 50000  | 40000                   |
+| 2       | 35000  | NULL                    |
+| 2       | 45000  | 45000                   |
++---------+--------+-------------------------+
+```"#)
+            .with_standard_argument("expression", "The column or expression to retrieve the nth value from.")
+            .with_argument("n", "The position (nth) of the value to retrieve, based on the ordering.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index a25ab5e31991..9f9da0c585fc 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -19,17 +19,19 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::Float64Array;
 use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field};
 
 use datafusion_common::{internal_err, not_impl_err, Result};
 use datafusion_common::{plan_err, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, Signature,
+    Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 
@@ -132,6 +134,35 @@ impl AggregateUDFImpl for Stddev {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         Ok(Box::new(StddevGroupsAccumulator::new(StatsType::Sample)))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_stddev_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_stddev_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the standard deviation of a set of numbers.",
+            )
+            .with_syntax_example("stddev(expression)")
+            .with_sql_example(r#"```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 make_udaf_expr_and_func!(
@@ -228,6 +259,33 @@ impl AggregateUDFImpl for StddevPop {
             StatsType::Population,
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_stddev_pop_doc())
+    }
+}
+
+fn get_stddev_pop_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the population standard deviation of a set of numbers.",
+            )
+            .with_syntax_example("stddev_pop(expression)")
+            .with_sql_example(r#"```sql
+> SELECT stddev_pop(column_name) FROM table_name;
++--------------------------+
+| stddev_pop(column_name)   |
++--------------------------+
+| 10.56                    |
++--------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute the average
diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs
index a7e9a37e23ad..66fc19910696 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -22,12 +22,14 @@ use arrow_schema::DataType;
 use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::Result;
 use datafusion_common::{not_impl_err, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Signature, TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, TypeSignature, Volatility,
 };
 use datafusion_physical_expr::expressions::Literal;
 use std::any::Any;
+use std::sync::OnceLock;
 
 make_udaf_expr_and_func!(
     StringAgg,
@@ -98,6 +100,37 @@ impl AggregateUDFImpl for StringAgg {
 
         not_impl_err!("expect literal")
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_string_agg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_string_agg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Concatenates the values of string expressions and places separator values between them."
+            )
+            .with_syntax_example("string_agg(expression, delimiter)")
+            .with_sql_example(r#"```sql
+> SELECT string_agg(name, ', ') AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Alice, Bob, Charlie      |
++--------------------------+
+```"#, 
+            )
+            .with_argument("expression", "The string expression to concatenate. Can be a column or any valid string expression.")
+            .with_argument("delimiter", "A literal string used as a separator between the concatenated values.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs
index 7e40c1bd17a8..91e777dd2a87 100644
--- a/datafusion/functions-aggregate/src/sum.rs
+++ b/datafusion/functions-aggregate/src/sum.rs
@@ -21,6 +21,7 @@ use ahash::RandomState;
 use datafusion_expr::utils::AggregateOrderSensitivity;
 use std::any::Any;
 use std::collections::HashSet;
+use std::sync::OnceLock;
 
 use arrow::array::Array;
 use arrow::array::ArrowNativeTypeOp;
@@ -33,11 +34,13 @@ use arrow::datatypes::{
 };
 use arrow::{array::ArrayRef, datatypes::Field};
 use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, ReversedUDAF,
+    Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use datafusion_functions_aggregate_common::utils::Hashable;
@@ -233,6 +236,35 @@ impl AggregateUDFImpl for Sum {
     fn order_sensitivity(&self) -> AggregateOrderSensitivity {
         AggregateOrderSensitivity::Insensitive
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_sum_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_sum_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the sum of all values in the specified column.",
+            )
+            .with_syntax_example("sum(expression)")
+            .with_sql_example(r#"```sql
+> SELECT sum(column_name) FROM table_name;
++-----------------------+
+| sum(column_name)       |
++-----------------------+
+| 12345                 |
++-----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// This accumulator computes SUM incrementally
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index a3d114221d3f..2ffe93a0e567 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -117,6 +117,11 @@ harness = false
 name = "make_date"
 required-features = ["datetime_expressions"]
 
+[[bench]]
+harness = false
+name = "iszero"
+required-features = ["math_expressions"]
+
 [[bench]]
 harness = false
 name = "nullif"
diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs
new file mode 100644
index 000000000000..3348d172e1f2
--- /dev/null
+++ b/datafusion/functions/benches/iszero.rs
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::{
+    datatypes::{Float32Type, Float64Type},
+    util::bench_util::create_primitive_array,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::math::iszero;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let iszero = iszero();
+    for size in [1024, 4096, 8192] {
+        let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
+        let f32_args = vec![ColumnarValue::Array(f32_array)];
+        c.bench_function(&format!("iszero f32 array: {}", size), |b| {
+            b.iter(|| black_box(iszero.invoke(&f32_args).unwrap()))
+        });
+        let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
+        let f64_args = vec![ColumnarValue::Array(f64_array)];
+        c.bench_function(&format!("iszero f64 array: {}", size), |b| {
+            b.iter(|| black_box(iszero.invoke(&f64_args).unwrap()))
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index a1b74228a503..a3e3feaa17e3 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -17,17 +17,19 @@
 
 //! [`ArrowCastFunc`]: Implementation of the `arrow_cast`
 
-use std::any::Any;
-
 use arrow::datatypes::DataType;
 use datafusion_common::{
     arrow_datafusion_err, internal_err, plan_datafusion_err, plan_err, DataFusionError,
     ExprSchema, Result, ScalarValue,
 };
+use std::any::Any;
+use std::sync::OnceLock;
 
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    ColumnarValue, Expr, ExprSchemable, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, Documentation, Expr, ExprSchemable, ScalarUDFImpl, Signature,
+    Volatility,
 };
 
 /// Implements casting to arbitrary arrow types (rather than SQL types)
@@ -131,6 +133,39 @@ impl ScalarUDFImpl for ArrowCastFunc {
         // return the newly written argument to DataFusion
         Ok(ExprSimplifyResult::Simplified(new_expr))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_arrow_cast_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_arrow_cast_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_OTHER)
+            .with_description("Casts a value to a specific Arrow data type.")
+            .with_syntax_example("arrow_cast(expression, datatype)")
+            .with_sql_example(
+                r#"```sql
+> select arrow_cast(-5, 'Int8') as a,
+  arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
+  arrow_cast('bar', 'LargeUtf8') as c,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
+  ;
++----+-----+-----+---------------------------+
+| a  | b   | c   | d                         |
++----+-----+-----+---------------------------+
+| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
++----+-----+-----+---------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to cast. The expression can be a constant, column, or function, and any combination of operators.")
+            .with_argument("datatype", "[Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to cast to, as a string. The format is the same as that returned by [`arrow_typeof`]")
+            .build()
+            .unwrap()
+    })
 }
 
 /// Returns the requested type from the arguments
diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs
index cc5e7e619bd8..a425aff6caad 100644
--- a/datafusion/functions/src/core/arrowtypeof.rs
+++ b/datafusion/functions/src/core/arrowtypeof.rs
@@ -17,9 +17,11 @@
 
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
+use datafusion_expr::{ColumnarValue, Documentation};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
+use std::sync::OnceLock;
 
 #[derive(Debug)]
 pub struct ArrowTypeOfFunc {
@@ -69,4 +71,35 @@ impl ScalarUDFImpl for ArrowTypeOfFunc {
             "{input_data_type}"
         ))))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_arrowtypeof_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_arrowtypeof_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_OTHER)
+            .with_description(
+                "Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression.",
+            )
+            .with_syntax_example("arrow_typeof(expression)")
+            .with_sql_example(
+                r#"```sql
+> select arrow_typeof('foo'), arrow_typeof(1);
++---------------------------+------------------------+
+| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) |
++---------------------------+------------------------+
+| Utf8                      | Int64                  |
++---------------------------+------------------------+
+```
+"#,
+            )
+            .with_argument("expression", "Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs
index d8ff44798f8a..15cd733a8cd6 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -47,23 +47,6 @@ impl CoalesceFunc {
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_coalesce_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder()
-            .with_doc_section(DOC_SECTION_CONDITIONAL)
-            .with_description("Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.")
-            .with_syntax_example("coalesce(expression1[, ..., expression_n])")
-            .with_argument(
-                "expression1, expression_n",
-                "Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary."
-            )
-            .build()
-            .unwrap()
-    })
-}
-
 impl ScalarUDFImpl for CoalesceFunc {
     fn as_any(&self) -> &dyn Any {
         self
@@ -164,6 +147,32 @@ impl ScalarUDFImpl for CoalesceFunc {
     }
 }
 
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_coalesce_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_CONDITIONAL)
+            .with_description("Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.")
+            .with_syntax_example("coalesce(expression1[, ..., expression_n])")
+            .with_sql_example(r#"```sql
+> select coalesce(null, null, 'datafusion');
++----------------------------------------+
+| coalesce(NULL,NULL,Utf8("datafusion")) |
++----------------------------------------+
+| datafusion                             |
++----------------------------------------+
+```"#,
+            )
+            .with_argument(
+                "expression1, expression_n",
+                "Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary."
+            )
+            .build()
+            .unwrap()
+    })
+}
+
 #[cfg(test)]
 mod test {
     use arrow::datatypes::DataType;
diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs
index a51f895c5084..c0af4d35966b 100644
--- a/datafusion/functions/src/core/getfield.rs
+++ b/datafusion/functions/src/core/getfield.rs
@@ -23,10 +23,11 @@ use datafusion_common::cast::{as_map_array, as_struct_array};
 use datafusion_common::{
     exec_err, plan_datafusion_err, plan_err, ExprSchema, Result, ScalarValue,
 };
-use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
+use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 #[derive(Debug)]
 pub struct GetFieldFunc {
@@ -133,7 +134,7 @@ impl ScalarUDFImpl for GetFieldFunc {
                     DataType::Struct(fields) if fields.len() == 2 => {
                         // Arrow's MapArray is essentially a ListArray of structs with two columns. They are
                         // often named "key", and "value", but we don't require any specific naming here;
-                        // instead, we assume that the second columnis the "value" column both here and in
+                        // instead, we assume that the second column is the "value" column both here and in
                         // execution.
                         let value_field = fields.get(1).expect("fields should have exactly two members");
                         Ok(value_field.data_type().clone())
@@ -155,7 +156,7 @@ impl ScalarUDFImpl for GetFieldFunc {
                 "Only UTF8 strings are valid as an indexed field in a struct"
             ),
             (DataType::Null, _) => Ok(DataType::Null),
-            (other, _) => plan_err!("The expression to get an indexed field is only valid for `List`, `Struct`, `Map` or `Null` types, got {other}"),
+            (other, _) => plan_err!("The expression to get an indexed field is only valid for `Struct`, `Map` or `Null` types, got {other}"),
         }
     }
 
@@ -190,7 +191,7 @@ impl ScalarUDFImpl for GetFieldFunc {
                 let keys = arrow::compute::kernels::cmp::eq(&key_scalar, map_array.keys())?;
 
                 // note that this array has more entries than the expected output/input size
-                // because maparray is flatten
+                // because map_array is flattened
                 let original_data =  map_array.entries().column(1).to_data();
                 let capacity = Capacities::Array(original_data.len());
                 let mut mutable =
@@ -205,7 +206,7 @@ impl ScalarUDFImpl for GetFieldFunc {
                                         keys.slice(start, end-start).
                                         iter().enumerate().
                                         find(|(_, t)| t.unwrap());
-                    if maybe_matched.is_none(){
+                    if maybe_matched.is_none() {
                         mutable.extend_nulls(1);
                         continue
                     }
@@ -224,14 +225,67 @@ impl ScalarUDFImpl for GetFieldFunc {
                 }
             }
             (DataType::Struct(_), name) => exec_err!(
-                "get indexed field is only possible on struct with utf8 indexes. \
-                             Tried with {name:?} index"
+                "get_field is only possible on struct with utf8 indexes. \
+                             Received with {name:?} index"
             ),
             (DataType::Null, _) => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
             (dt, name) => exec_err!(
-                "get indexed field is only possible on lists with int64 indexes or struct \
-                                         with utf8 indexes. Tried {dt:?} with {name:?} index"
+                "get_field is only possible on maps with utf8 indexes or struct \
+                                         with utf8 indexes. Received {dt:?} with {name:?} index"
             ),
         }
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_getfield_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_getfield_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_OTHER)
+            .with_description(r#"Returns a field within a map or a struct with the given key.
+Note: most users invoke `get_field` indirectly via field access
+syntax such as `my_struct_col['field_name']` which results in a call to
+`get_field(my_struct_col, 'field_name')`."#)
+            .with_syntax_example("get_field(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow');
+> select struct(idx, v) from t as c;
++-------------------------+
+| struct(c.idx,c.v)       |
++-------------------------+
+| {c0: data, c1: fusion}  |
+| {c0: apache, c1: arrow} |
++-------------------------+
+> select get_field((select struct(idx, v) from t), 'c0');
++-----------------------+
+| struct(t.idx,t.v)[c0] |
++-----------------------+
+| data                  |
+| apache                |
++-----------------------+
+> select get_field((select struct(idx, v) from t), 'c1');
++-----------------------+
+| struct(t.idx,t.v)[c1] |
++-----------------------+
+| fusion                |
+| arrow                 |
++-----------------------+
+```
+ "#)
+            .with_argument(
+                "expression1",
+                "The map or struct to retrieve a field for."
+            )
+            .with_argument(
+                "expression2",
+                "The field name in the map or struct to retrieve data for. Must evaluate to a string."
+            )
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs
index 85c332745355..342f99274aca 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -18,11 +18,12 @@
 use arrow::array::StructArray;
 use arrow::datatypes::{DataType, Field, Fields};
 use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, Expr, ExprSchemable};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRUCT;
+use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use hashbrown::HashSet;
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 /// put values in a struct array.
 fn named_struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -161,4 +162,46 @@ impl ScalarUDFImpl for NamedStructFunc {
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         named_struct_expr(args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_named_struct_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_named_struct_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STRUCT)
+            .with_description("Returns an Arrow struct using the specified name and input expressions pairs.")
+            .with_syntax_example("named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input])")
+            .with_sql_example(r#"
+For example, this query converts two columns `a` and `b` to a single column with
+a struct type of fields `field_a` and `field_b`:
+```sql
+> select * from t;
++---+---+
+| a | b |
++---+---+
+| 1 | 2 |
+| 3 | 4 |
++---+---+
+> select named_struct('field_a', a, 'field_b', b) from t;
++-------------------------------------------------------+
+| named_struct(Utf8("field_a"),t.a,Utf8("field_b"),t.b) |
++-------------------------------------------------------+
+| {field_a: 1, field_b: 2}                              |
+| {field_a: 3, field_b: 4}                              |
++-------------------------------------------------------+
+```
+"#)
+            .with_argument(
+                "expression_n_name",
+                "Name of the column field. Must be a constant string."
+            )
+            .with_argument("expression_n_input", "Expression to include in the output struct. Can be a constant, column, or function, and any combination of arithmetic or string operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 6fcfbd36416e..f96ee1ea7a12 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -17,13 +17,15 @@
 
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, Documentation};
 
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::kernels::nullif::nullif;
 use datafusion_common::ScalarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
+use std::sync::OnceLock;
 
 #[derive(Debug)]
 pub struct NullIfFunc {
@@ -93,6 +95,47 @@ impl ScalarUDFImpl for NullIfFunc {
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         nullif_func(args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_nullif_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_nullif_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_CONDITIONAL)
+            .with_description("Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_.
+This can be used to perform the inverse operation of [`coalesce`](#coalesce).")
+            .with_syntax_example("nullif(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> select nullif('datafusion', 'data');
++-----------------------------------------+
+| nullif(Utf8("datafusion"),Utf8("data")) |
++-----------------------------------------+
+| datafusion                              |
++-----------------------------------------+
+> select nullif('datafusion', 'datafusion');
++-----------------------------------------------+
+| nullif(Utf8("datafusion"),Utf8("datafusion")) |
++-----------------------------------------------+
+|                                               |
++-----------------------------------------------+
+```
+"#)
+            .with_argument(
+                "expression1",
+                "Expression to compare and return if equal to expression2. Can be a constant, column, or function, and any combination of operators."
+            )
+            .with_argument(
+                "expression2",
+                "Expression to compare to expression1. Can be a constant, column, or function, and any combination of operators."
+            )
+            .build()
+            .unwrap()
+    })
 }
 
 /// Implements NULLIF(expr1, expr2)
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index a09224acefcd..16438e1b6254 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -20,8 +20,11 @@ use arrow::compute::is_not_null;
 use arrow::compute::kernels::zip::zip;
 use arrow::datatypes::DataType;
 use datafusion_common::{internal_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-use std::sync::Arc;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
+use std::sync::{Arc, OnceLock};
 
 #[derive(Debug)]
 pub struct NVLFunc {
@@ -91,6 +94,46 @@ impl ScalarUDFImpl for NVLFunc {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_nvl_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_nvl_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_CONDITIONAL)
+            .with_description("Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.")
+            .with_syntax_example("nvl(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> select nvl(null, 'a');
++---------------------+
+| nvl(NULL,Utf8("a")) |
++---------------------+
+| a                   |
++---------------------+\
+> select nvl('b', 'a');
++--------------------------+
+| nvl(Utf8("b"),Utf8("a")) |
++--------------------------+
+| b                        |
++--------------------------+
+```
+"#)
+            .with_argument(
+                "expression1",
+                "Expression to return if not null. Can be a constant, column, or function, and any combination of operators."
+            )
+            .with_argument(
+                "expression2",
+                "Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators."
+            )
+            .build()
+            .unwrap()
+    })
 }
 
 fn nvl_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs
index 1144dc0fb7c5..cfcdb4480787 100644
--- a/datafusion/functions/src/core/nvl2.rs
+++ b/datafusion/functions/src/core/nvl2.rs
@@ -20,11 +20,12 @@ use arrow::compute::is_not_null;
 use arrow::compute::kernels::zip::zip;
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
 use datafusion_expr::{
-    type_coercion::binary::comparison_coercion, ColumnarValue, ScalarUDFImpl, Signature,
-    Volatility,
+    type_coercion::binary::comparison_coercion, ColumnarValue, Documentation,
+    ScalarUDFImpl, Signature, Volatility,
 };
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 #[derive(Debug)]
 pub struct NVL2Func {
@@ -90,6 +91,50 @@ impl ScalarUDFImpl for NVL2Func {
         )?;
         Ok(vec![new_type; arg_types.len()])
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_nvl2_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_nvl2_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_CONDITIONAL)
+            .with_description("Returns _expression2_ if _expression1_ is not NULL; otherwise it returns _expression3_.")
+            .with_syntax_example("nvl2(expression1, expression2, expression3)")
+            .with_sql_example(r#"```sql
+> select nvl2(null, 'a', 'b');
++--------------------------------+
+| nvl2(NULL,Utf8("a"),Utf8("b")) |
++--------------------------------+
+| b                              |
++--------------------------------+
+> select nvl2('data', 'a', 'b');
++----------------------------------------+
+| nvl2(Utf8("data"),Utf8("a"),Utf8("b")) |
++----------------------------------------+
+| a                                      |
++----------------------------------------+
+```
+"#)
+            .with_argument(
+                "expression1",
+                "Expression to test for null. Can be a constant, column, or function, and any combination of operators."
+            )
+            .with_argument(
+                "expression2",
+                "Expression to return if expr1 is not null. Can be a constant, column, or function, and any combination of operators."
+            )
+            .with_argument(
+                "expression3",
+                "Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators."
+            )
+            .build()
+            .unwrap()
+    })
 }
 
 fn nvl2_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs
index 5eea7dd48de8..75d1d4eca698 100644
--- a/datafusion/functions/src/core/struct.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -18,10 +18,11 @@
 use arrow::array::{ArrayRef, StructArray};
 use arrow::datatypes::{DataType, Field, Fields};
 use datafusion_common::{exec_err, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRUCT;
+use datafusion_expr::{ColumnarValue, Documentation};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
     // do not accept 0 arguments.
@@ -103,4 +104,56 @@ impl ScalarUDFImpl for StructFunc {
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         struct_expr(args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_struct_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_struct_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STRUCT)
+            .with_description("Returns an Arrow struct using the specified input expressions optionally named.
+Fields in the returned struct use the optional name or the `cN` naming convention.
+For example: `c0`, `c1`, `c2`, etc.")
+            .with_syntax_example("struct(expression1[, ..., expression_n])")
+            .with_sql_example(r#"For example, this query converts two columns `a` and `b` to a single column with
+a struct type of fields `field_a` and `c1`:
+```sql
+> select * from t;
++---+---+
+| a | b |
++---+---+
+| 1 | 2 |
+| 3 | 4 |
++---+---+
+
+-- use default names `c0`, `c1`
+> select struct(a, b) from t;
++-----------------+
+| struct(t.a,t.b) |
++-----------------+
+| {c0: 1, c1: 2}  |
+| {c0: 3, c1: 4}  |
++-----------------+
+
+-- name the first field `field_a`
+select struct(a as field_a, b) from t;
++--------------------------------------------------+
+| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) |
++--------------------------------------------------+
+| {field_a: 1, c1: 2}                              |
+| {field_a: 3, c1: 4}                              |
++--------------------------------------------------+
+```
+"#)
+            .with_argument(
+                "expression1, expression_n",
+                "Expression to include in the output struct. Can be a constant, column, or function, any combination of arithmetic or string operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs
index 212349e68981..f726122c649a 100644
--- a/datafusion/functions/src/core/version.rs
+++ b/datafusion/functions/src/core/version.rs
@@ -17,11 +17,14 @@
 
 //! [`VersionFunc`]: Implementation of the `version` function.
 
-use std::any::Any;
-
 use arrow::datatypes::DataType;
 use datafusion_common::{not_impl_err, plan_err, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
+use std::any::Any;
+use std::sync::OnceLock;
 
 #[derive(Debug)]
 pub struct VersionFunc {
@@ -78,6 +81,33 @@ impl ScalarUDFImpl for VersionFunc {
         );
         Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(version))))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_version_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_version_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_OTHER)
+            .with_description("Returns the version of DataFusion.")
+            .with_syntax_example("version()")
+            .with_sql_example(
+                r#"```sql
+> select version();
++--------------------------------------------+
+| version()                                  |
++--------------------------------------------+
+| Apache DataFusion 42.0.0, aarch64 on macos |
++--------------------------------------------+
+```"#,
+            )
+            .build()
+            .unwrap()
+    })
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs
index e6a728053359..74611b65aaba 100644
--- a/datafusion/functions/src/math/iszero.rs
+++ b/datafusion/functions/src/math/iszero.rs
@@ -18,11 +18,11 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
-use arrow::datatypes::DataType;
+use arrow::array::{ArrayRef, AsArray, BooleanArray};
 use arrow::datatypes::DataType::{Boolean, Float32, Float64};
+use arrow::datatypes::{DataType, Float32Type, Float64Type};
 
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -77,20 +77,14 @@ impl ScalarUDFImpl for IsZeroFunc {
 /// Iszero SQL function
 pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
-        Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
-            &args[0],
-            "x",
-            Float64Array,
-            BooleanArray,
-            { |x: f64| { x == 0_f64 } }
+        Float64 => Ok(Arc::new(BooleanArray::from_unary(
+            args[0].as_primitive::<Float64Type>(),
+            |x| x == 0.0,
         )) as ArrayRef),
 
-        Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
-            &args[0],
-            "x",
-            Float32Array,
-            BooleanArray,
-            { |x: f32| { x == 0_f32 } }
+        Float32 => Ok(Arc::new(BooleanArray::from_unary(
+            args[0].as_primitive::<Float32Type>(),
+            |x| x == 0.0,
         )) as ArrayRef),
 
         other => exec_err!("Unsupported data type {other:?} for function iszero"),
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 029003374acc..6068e7526316 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -780,7 +780,7 @@ mod tests {
                 };
                 sort_info.push(sort_expr);
             }
-            exec = exec.with_sort_information(vec![sort_info]);
+            exec = exec.try_with_sort_information(vec![sort_info]).unwrap();
         }
 
         Arc::new(exec)
diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
index 264f297ffb4c..090d60f0bac3 100644
--- a/datafusion/physical-plan/src/joins/test_utils.rs
+++ b/datafusion/physical-plan/src/joins/test_utils.rs
@@ -289,7 +289,7 @@ macro_rules! join_expr_tests {
                     ScalarValue::$SCALAR(Some(10 as $type)),
                     (Operator::Gt, Operator::Lt),
                 ),
-                // left_col - 1 > right_col + 5 AND left_col + 3 < right_col + 10
+                // left_col - 1 > right_col + 3 AND left_col + 3 < right_col + 15
                 1 => gen_conjunctive_numerical_expr(
                     left_col,
                     right_col,
@@ -300,9 +300,9 @@ macro_rules! join_expr_tests {
                         Operator::Plus,
                     ),
                     ScalarValue::$SCALAR(Some(1 as $type)),
-                    ScalarValue::$SCALAR(Some(5 as $type)),
                     ScalarValue::$SCALAR(Some(3 as $type)),
-                    ScalarValue::$SCALAR(Some(10 as $type)),
+                    ScalarValue::$SCALAR(Some(3 as $type)),
+                    ScalarValue::$SCALAR(Some(15 as $type)),
                     (Operator::Gt, Operator::Lt),
                 ),
                 // left_col - 1 > right_col + 5 AND left_col - 3 < right_col + 10
@@ -353,7 +353,8 @@ macro_rules! join_expr_tests {
                     ScalarValue::$SCALAR(Some(3 as $type)),
                     (Operator::Gt, Operator::Lt),
                 ),
-                // left_col - 2 >= right_col - 5 AND left_col - 7 <= right_col - 3
+                // left_col - 2 >= right_col + 5 AND left_col + 7 <= right_col - 3
+                // (filters all input rows)
                 5 => gen_conjunctive_numerical_expr(
                     left_col,
                     right_col,
@@ -369,7 +370,7 @@ macro_rules! join_expr_tests {
                     ScalarValue::$SCALAR(Some(3 as $type)),
                     (Operator::GtEq, Operator::LtEq),
                 ),
-                // left_col - 28 >= right_col - 11 AND left_col - 21 <= right_col - 39
+                // left_col + 28 >= right_col - 11 AND left_col + 21 <= right_col + 39
                 6 => gen_conjunctive_numerical_expr(
                     left_col,
                     right_col,
@@ -385,7 +386,7 @@ macro_rules! join_expr_tests {
                     ScalarValue::$SCALAR(Some(39 as $type)),
                     (Operator::Gt, Operator::LtEq),
                 ),
-                // left_col - 28 >= right_col - 11 AND left_col - 21 <= right_col + 39
+                // left_col + 28 >= right_col - 11 AND left_col - 21 <= right_col + 39
                 7 => gen_conjunctive_numerical_expr(
                     left_col,
                     right_col,
@@ -526,10 +527,10 @@ pub fn create_memory_table(
 ) -> Result<(Arc<dyn ExecutionPlan>, Arc<dyn ExecutionPlan>)> {
     let left_schema = left_partition[0].schema();
     let left = MemoryExec::try_new(&[left_partition], left_schema, None)?
-        .with_sort_information(left_sorted);
+        .try_with_sort_information(left_sorted)?;
     let right_schema = right_partition[0].schema();
     let right = MemoryExec::try_new(&[right_partition], right_schema, None)?
-        .with_sort_information(right_sorted);
+        .try_with_sort_information(right_sorted)?;
     Ok((Arc::new(left), Arc::new(right)))
 }
 
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 3aa445d295cb..456f0ef2dcc8 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -33,6 +33,9 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::{internal_err, project_schema, Result};
 use datafusion_execution::memory_pool::MemoryReservation;
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::equivalence::ProjectionMapping;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 
 use futures::Stream;
@@ -206,16 +209,63 @@ impl MemoryExec {
     /// where both `a ASC` and `b DESC` can describe the table ordering. With
     /// [`EquivalenceProperties`], we can keep track of these equivalences
     /// and treat `a ASC` and `b DESC` as the same ordering requirement.
-    pub fn with_sort_information(mut self, sort_information: Vec<LexOrdering>) -> Self {
-        self.sort_information = sort_information;
+    ///
+    /// Note that if there is an internal projection, that projection will be
+    /// also applied to the given `sort_information`.
+    pub fn try_with_sort_information(
+        mut self,
+        mut sort_information: Vec<LexOrdering>,
+    ) -> Result<Self> {
+        // All sort expressions must refer to the original schema
+        let fields = self.schema.fields();
+        let ambiguous_column = sort_information
+            .iter()
+            .flatten()
+            .flat_map(|expr| collect_columns(&expr.expr))
+            .find(|col| {
+                fields
+                    .get(col.index())
+                    .map(|field| field.name() != col.name())
+                    .unwrap_or(true)
+            });
+        if let Some(col) = ambiguous_column {
+            return internal_err!(
+                "Column {:?} is not found in the original schema of the MemoryExec",
+                col
+            );
+        }
+
+        // If there is a projection on the source, we also need to project orderings
+        if let Some(projection) = &self.projection {
+            let base_eqp = EquivalenceProperties::new_with_orderings(
+                self.original_schema(),
+                &sort_information,
+            );
+            let proj_exprs = projection
+                .iter()
+                .map(|idx| {
+                    let base_schema = self.original_schema();
+                    let name = base_schema.field(*idx).name();
+                    (Arc::new(Column::new(name, *idx)) as _, name.to_string())
+                })
+                .collect::<Vec<_>>();
+            let projection_mapping =
+                ProjectionMapping::try_new(&proj_exprs, &self.original_schema())?;
+            sort_information = base_eqp
+                .project(&projection_mapping, self.schema())
+                .oeq_class
+                .orderings;
+        }
 
+        self.sort_information = sort_information;
         // We need to update equivalence properties when updating sort information.
         let eq_properties = EquivalenceProperties::new_with_orderings(
             self.schema(),
             &self.sort_information,
         );
         self.cache = self.cache.with_eq_properties(eq_properties);
-        self
+
+        Ok(self)
     }
 
     pub fn original_schema(&self) -> SchemaRef {
@@ -347,7 +397,7 @@ mod tests {
 
         let sort_information = vec![sort1.clone(), sort2.clone()];
         let mem_exec = MemoryExec::try_new(&[vec![]], schema, None)?
-            .with_sort_information(sort_information);
+            .try_with_sort_information(sort_information)?;
 
         assert_eq!(
             mem_exec.properties().output_ordering().unwrap(),
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index d9368cf86d45..902d9f4477bc 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -1677,7 +1677,8 @@ mod test {
         Arc::new(
             MemoryExec::try_new(&[vec![]], Arc::clone(schema), None)
                 .unwrap()
-                .with_sort_information(vec![sort_exprs]),
+                .try_with_sort_information(vec![sort_exprs])
+                .unwrap(),
         )
     }
 }
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 1cf22060b62a..108e42e7be42 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -809,11 +809,11 @@ mod tests {
                 .collect::<Vec<_>>();
             let child1 = Arc::new(
                 MemoryExec::try_new(&[], Arc::clone(&schema), None)?
-                    .with_sort_information(first_orderings),
+                    .try_with_sort_information(first_orderings)?,
             );
             let child2 = Arc::new(
                 MemoryExec::try_new(&[], Arc::clone(&schema), None)?
-                    .with_sort_information(second_orderings),
+                    .try_with_sort_information(second_orderings)?,
             );
 
             let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema));
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index c029fe2a23d8..69c7745165f4 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -477,6 +477,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
                 let filter_expr =
                     self.sql_to_expr(predicate_expr, plan.schema(), planner_context)?;
+
+                // Check for aggregation functions
+                let aggregate_exprs =
+                    find_aggregate_exprs(std::slice::from_ref(&filter_expr));
+                if !aggregate_exprs.is_empty() {
+                    return plan_err!(
+                        "Aggregate functions are not allowed in the WHERE clause. Consider using HAVING instead"
+                    );
+                }
+
                 let mut using_columns = HashSet::new();
                 expr_to_columns(&filter_expr, &mut using_columns)?;
                 let filter_expr = normalize_col_with_schemas_and_ambiguity_check(
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 3d5c37372aca..f03c3700ab9f 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -6076,3 +6076,13 @@ ORDER BY k;
 ----
 1 1.8125 6.8007813 Float16 Float16
 2 8.5 8.5 Float16 Float16
+
+statement ok
+CREATE TABLE t1(v1 int);
+
+# issue: https://github.com/apache/datafusion/issues/12814 
+statement error DataFusion error: Error during planning: Aggregate functions are not allowed in the WHERE clause. Consider using HAVING instead
+SELECT v1 FROM t1 WHERE ((count(v1) % 1) << 1) > 0;
+
+statement ok
+DROP TABLE t1;
diff --git a/docs/source/index.rst b/docs/source/index.rst
index f11670d259bf..27dd58cf50f4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -36,9 +36,11 @@ Apache DataFusion
 DataFusion is an extensible query engine written in `Rust <http://rustlang.org>`_ that
 uses `Apache Arrow <https://arrow.apache.org>`_ as its in-memory format.
 
-This documentation is for the `core DataFusion project <https://github.com/apache/datafusion>`_, which contains
-libraries that are used to build data-centric system software. DataFusion also offers the following subprojects, which
-provide packaged versions of DataFusion intended for end users, and these have separate documentation.
+The documentation on this site is for the `core DataFusion project <https://github.com/apache/datafusion>`_, which contains
+libraries and binaries for developers building fast and feature rich database and analytic systems,
+customized to particular workloads. See `use cases <https://datafusion.apache.org/user-guide/introduction.html#use-cases>`_ for examples.
+
+The following related subprojects target end users and have separate documentation.
 
 - `DataFusion Python <https://datafusion.apache.org/python/>`_ offers a Python interface for SQL and DataFrame
   queries.
@@ -47,10 +49,6 @@ provide packaged versions of DataFusion intended for end users, and these have s
 - `DataFusion Comet <https://datafusion.apache.org/comet/>`_ is an accelerator for Apache Spark based on
   DataFusion.
 
-DataFusion's target users are
-developers building fast and feature rich database and analytic systems,
-customized to particular workloads. See `use cases <https://datafusion.apache.org/user-guide/introduction.html#use-cases>`_ for examples.
-
 "Out of the box," DataFusion offers `SQL <https://datafusion.apache.org/user-guide/sql/index.html>`_
 and `Dataframe <https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html>`_ APIs,
 excellent `performance <https://benchmark.clickhouse.com/>`_, built-in support for CSV, Parquet, JSON, and Avro,
diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md
index 1c25874c0806..4f774fe6d0f0 100644
--- a/docs/source/user-guide/sql/aggregate_functions.md
+++ b/docs/source/user-guide/sql/aggregate_functions.md
@@ -27,183 +27,9 @@ the rest of the documentation.
 
 [automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740
 
-## General
-
-- [avg](#avg)
-- [bool_and](#bool_and)
-- [bool_or](#bool_or)
-- [count](#count)
-- [max](#max)
-- [mean](#mean)
-- [median](#median)
-- [min](#min)
-- [sum](#sum)
-- [array_agg](#array_agg)
-- [first_value](#first_value)
-- [last_value](#last_value)
-
-### `avg`
-
-Returns the average of numeric values in the specified column.
-
-```
-avg(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-#### Aliases
-
-- `mean`
-
-### `bool_and`
-
-Returns true if all non-null input values are true, otherwise false.
-
-```
-bool_and(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `bool_or`
-
-Returns true if any non-null input value is true, otherwise false.
-
-```
-bool_or(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `count`
-
-Returns the number of non-null values in the specified column.
-
-To include _null_ values in the total count, use `count(*)`.
-
-```
-count(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `max`
-
-Returns the maximum value in the specified column.
-
-```
-max(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `mean`
-
-_Alias of [avg](#avg)._
-
-### `median`
-
-Returns the median value in the specified column.
-
-```
-median(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `min`
-
-Returns the minimum value in the specified column.
-
-```
-min(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `sum`
-
-Returns the sum of all values in the specified column.
-
-```
-sum(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `array_agg`
-
-Returns an array created from the expression elements. If ordering requirement is given, elements are inserted in the order of required ordering.
-
-```
-array_agg(expression [ORDER BY expression])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `first_value`
-
-Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.
-
-```
-first_value(expression [ORDER BY expression])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `last_value`
-
-Returns the last element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.
-
-```
-last_value(expression [ORDER BY expression])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
 ## Statistical
 
-- [corr](#corr)
 - [covar](#covar)
-- [covar_pop](#covar_pop)
-- [covar_samp](#covar_samp)
-- [stddev](#stddev)
-- [stddev_pop](#stddev_pop)
-- [stddev_samp](#stddev_samp)
 - [regr_avgx](#regr_avgx)
 - [regr_avgy](#regr_avgy)
 - [regr_count](#regr_count)
@@ -214,21 +40,6 @@ last_value(expression [ORDER BY expression])
 - [regr_syy](#regr_syy)
 - [regr_sxy](#regr_sxy)
 
-### `corr`
-
-Returns the coefficient of correlation between two numeric values.
-
-```
-corr(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: First expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression2**: Second expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
 ### `covar`
 
 Returns the covariance of a set of number pairs.
@@ -244,75 +55,6 @@ covar(expression1, expression2)
 - **expression2**: Second expression to operate on.
   Can be a constant, column, or function, and any combination of arithmetic operators.
 
-### `covar_pop`
-
-Returns the population covariance of a set of number pairs.
-
-```
-covar_pop(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: First expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression2**: Second expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `covar_samp`
-
-Returns the sample covariance of a set of number pairs.
-
-```
-covar_samp(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: First expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression2**: Second expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `stddev`
-
-Returns the standard deviation of a set of numbers.
-
-```
-stddev(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `stddev_pop`
-
-Returns the population standard deviation of a set of numbers.
-
-```
-stddev_pop(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `stddev_samp`
-
-Returns the sample standard deviation of a set of numbers.
-
-```
-stddev_samp(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
 ### `regr_slope`
 
 Returns the slope of the linear regression line for non-null pairs in aggregate columns.
@@ -448,74 +190,3 @@ regr_sxy(expression_y, expression_x)
   Can be a constant, column, or function, and any combination of arithmetic operators.
 - **expression_x**: Independent variable.
   Can be a constant, column, or function, and any combination of arithmetic operators.
-
-## Approximate
-
-- [approx_distinct](#approx_distinct)
-- [approx_median](#approx_median)
-- [approx_percentile_cont](#approx_percentile_cont)
-- [approx_percentile_cont_with_weight](#approx_percentile_cont_with_weight)
-
-### `approx_distinct`
-
-Returns the approximate number of distinct input values calculated using the
-HyperLogLog algorithm.
-
-```
-approx_distinct(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `approx_median`
-
-Returns the approximate median (50th percentile) of input values.
-It is an alias of `approx_percentile_cont(x, 0.5)`.
-
-```
-approx_median(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `approx_percentile_cont`
-
-Returns the approximate percentile of input values using the t-digest algorithm.
-
-```
-approx_percentile_cont(expression, percentile, centroids)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive).
-- **centroids**: Number of centroids to use in the t-digest algorithm. _Default is 100_.
-
-  If there are this number or fewer unique values, you can expect an exact result.
-  A higher number of centroids results in a more accurate approximation, but
-  requires more memory to compute.
-
-### `approx_percentile_cont_with_weight`
-
-Returns the weighted approximate percentile of input values using the
-t-digest algorithm.
-
-```
-approx_percentile_cont_with_weight(expression, weight, percentile)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **weight**: Expression to use as weight.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive).
diff --git a/docs/source/user-guide/sql/aggregate_functions_new.md b/docs/source/user-guide/sql/aggregate_functions_new.md
index 08cdb0a9867c..fc918c3b15ea 100644
--- a/docs/source/user-guide/sql/aggregate_functions_new.md
+++ b/docs/source/user-guide/sql/aggregate_functions_new.md
@@ -37,15 +37,80 @@ Aggregate functions operate on a set of values to compute a single result.
 
 ## General Functions
 
+- [array_agg](#array_agg)
+- [avg](#avg)
 - [bit_and](#bit_and)
 - [bit_or](#bit_or)
 - [bit_xor](#bit_xor)
+- [bool_and](#bool_and)
+- [bool_or](#bool_or)
+- [count](#count)
+- [first_value](#first_value)
+- [grouping](#grouping)
+- [last_value](#last_value)
+- [max](#max)
+- [mean](#mean)
+- [median](#median)
+- [min](#min)
+- [string_agg](#string_agg)
+- [sum](#sum)
 - [var](#var)
 - [var_pop](#var_pop)
 - [var_population](#var_population)
 - [var_samp](#var_samp)
 - [var_sample](#var_sample)
 
+### `array_agg`
+
+Returns an array created from the expression elements. If ordering is required, elements are inserted in the specified order.
+
+```
+array_agg(expression [ORDER BY expression])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT array_agg(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| array_agg(column_name ORDER BY other_column)  |
++-----------------------------------------------+
+| [element1, element2, element3]                |
++-----------------------------------------------+
+```
+
+### `avg`
+
+Returns the average of numeric values in the specified column.
+
+```
+avg(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **Aliases: **: `mean`
+
+#### Example
+
+```sql
+> SELECT avg(column_name) FROM table_name;
++---------------------------+
+| avg(column_name)           |
++---------------------------+
+| 42.75                      |
++---------------------------+
+```
+
+#### Aliases
+
+- mean
+
 ### `bit_and`
 
 Computes the bitwise AND of all non-null input values.
@@ -82,6 +147,276 @@ bit_xor(expression)
 
 - **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators.
 
+### `bool_and`
+
+Returns true if all non-null input values are true, otherwise false.
+
+```
+bool_and(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```
+
+### `bool_or`
+
+Returns true if all non-null input values are true, otherwise false.
+
+```
+bool_and(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```
+
+### `count`
+
+Returns the number of non-null values in the specified column. To include null values in the total count, use `count(*)`.
+
+```
+count(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT count(column_name) FROM table_name;
++-----------------------+
+| count(column_name)     |
++-----------------------+
+| 100                   |
++-----------------------+
+
+> SELECT count(*) FROM table_name;
++------------------+
+| count(*)         |
++------------------+
+| 120              |
++------------------+
+```
+
+### `first_value`
+
+Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.
+
+```
+first_value(expression [ORDER BY expression])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```
+
+### `grouping`
+
+Returns 1 if the data is aggregated across the specified column, or 0 if it is not aggregated in the result set.
+
+```
+grouping(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to evaluate whether data is aggregated across the specified column. Can be a constant, column, or function.
+
+#### Example
+
+```sql
+> SELECT column_name, GROUPING(column_name) AS group_column
+  FROM table_name
+  GROUP BY GROUPING SETS ((column_name), ());
++-------------+-------------+
+| column_name | group_column |
++-------------+-------------+
+| value1      | 0           |
+| value2      | 0           |
+| NULL        | 1           |
++-------------+-------------+
+```
+
+### `last_value`
+
+Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.
+
+```
+first_value(expression [ORDER BY expression])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```
+
+### `max`
+
+Returns the maximum value in the specified column.
+
+```
+max(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```
+
+### `mean`
+
+_Alias of [avg](#avg)._
+
+### `median`
+
+Returns the median value in the specified column.
+
+```
+median(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT median(column_name) FROM table_name;
++----------------------+
+| median(column_name)   |
++----------------------+
+| 45.5                 |
++----------------------+
+```
+
+### `min`
+
+Returns the maximum value in the specified column.
+
+```
+max(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```
+
+### `string_agg`
+
+Concatenates the values of string expressions and places separator values between them.
+
+```
+string_agg(expression, delimiter)
+```
+
+#### Arguments
+
+- **expression**: The string expression to concatenate. Can be a column or any valid string expression.
+- **delimiter**: A literal string used as a separator between the concatenated values.
+
+#### Example
+
+```sql
+> SELECT string_agg(name, ', ') AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Alice, Bob, Charlie      |
++--------------------------+
+```
+
+### `sum`
+
+Returns the sum of all values in the specified column.
+
+```
+sum(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT sum(column_name) FROM table_name;
++-----------------------+
+| sum(column_name)       |
++-----------------------+
+| 12345                 |
++-----------------------+
+```
+
 ### `var`
 
 Returns the statistical sample variance of a set of numbers.
@@ -126,3 +461,280 @@ _Alias of [var](#var)._
 ### `var_sample`
 
 _Alias of [var](#var)._
+
+## Statistical Functions
+
+- [corr](#corr)
+- [covar](#covar)
+- [covar_pop](#covar_pop)
+- [covar_samp](#covar_samp)
+- [nth_value](#nth_value)
+- [stddev](#stddev)
+- [stddev_pop](#stddev_pop)
+- [stddev_samp](#stddev_samp)
+
+### `corr`
+
+Returns the coefficient of correlation between two numeric values.
+
+```
+corr(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT corr(column1, column2) FROM table_name;
++--------------------------------+
+| corr(column1, column2)         |
++--------------------------------+
+| 0.85                           |
++--------------------------------+
+```
+
+### `covar`
+
+_Alias of [covar_samp](#covar_samp)._
+
+### `covar_pop`
+
+Returns the sample covariance of a set of number pairs.
+
+```
+covar_samp(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT covar_samp(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_samp(column1, column2)      |
++-----------------------------------+
+| 8.25                              |
++-----------------------------------+
+```
+
+### `covar_samp`
+
+Returns the sample covariance of a set of number pairs.
+
+```
+covar_samp(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT covar_samp(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_samp(column1, column2)      |
++-----------------------------------+
+| 8.25                              |
++-----------------------------------+
+```
+
+#### Aliases
+
+- covar
+
+### `nth_value`
+
+Returns the nth value in a group of values.
+
+```
+nth_value(expression, n ORDER BY expression)
+```
+
+#### Arguments
+
+- **expression**: The column or expression to retrieve the nth value from. expression to operate on. Can be a constant, column, or function, and any combination of operators.
+- **n**: The position (nth) of the value to retrieve, based on the ordering.
+
+#### Example
+
+```sql
+> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id ORDER BY salary ASC) AS second_salary_by_dept
+  FROM employee;
++---------+--------+-------------------------+
+| dept_id | salary | second_salary_by_dept   |
++---------+--------+-------------------------+
+| 1       | 30000  | NULL                    |
+| 1       | 40000  | 40000                   |
+| 1       | 50000  | 40000                   |
+| 2       | 35000  | NULL                    |
+| 2       | 45000  | 45000                   |
++---------+--------+-------------------------+
+```
+
+### `stddev`
+
+Returns the standard deviation of a set of numbers.
+
+```
+stddev(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```
+
+#### Aliases
+
+- stddev_samp
+
+### `stddev_pop`
+
+Returns the standard deviation of a set of numbers.
+
+```
+stddev(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```
+
+### `stddev_samp`
+
+_Alias of [stddev](#stddev)._
+
+## Approximate Functions
+
+- [approx_distinct](#approx_distinct)
+- [approx_median](#approx_median)
+- [approx_percentile_cont](#approx_percentile_cont)
+- [approx_percentile_cont_with_weight](#approx_percentile_cont_with_weight)
+
+### `approx_distinct`
+
+Returns the approximate number of distinct input values calculated using the HyperLogLog algorithm.
+
+```
+approx_distinct(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT approx_distinct(column_name) FROM table_name;
++-----------------------------------+
+| approx_distinct(column_name)      |
++-----------------------------------+
+| 42                                |
++-----------------------------------+
+```
+
+### `approx_median`
+
+Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`.
+
+```
+approx_median(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT approx_median(column_name) FROM table_name;
++-----------------------------------+
+| approx_median(column_name)        |
++-----------------------------------+
+| 23.5                              |
++-----------------------------------+
+```
+
+### `approx_percentile_cont`
+
+Returns the approximate percentile of input values using the t-digest algorithm.
+
+```
+approx_percentile_cont(expression, percentile, centroids)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive).
+- **centroids**: Number of centroids to use in the t-digest algorithm. _Default is 100_. A higher number results in more accurate approximation but requires more memory.
+
+#### Example
+
+```sql
+> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
++-------------------------------------------------+
+| approx_percentile_cont(column_name, 0.75, 100)  |
++-------------------------------------------------+
+| 65.0                                            |
++-------------------------------------------------+
+```
+
+### `approx_percentile_cont_with_weight`
+
+Returns the weighted approximate percentile of input values using the t-digest algorithm.
+
+```
+approx_percentile_cont_with_weight(expression, weight, percentile)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **weight**: Expression to use as weight. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive).
+
+#### Example
+
+```sql
+> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
++----------------------------------------------------------------------+
+| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
++----------------------------------------------------------------------+
+| 78.5                                                                 |
++----------------------------------------------------------------------+
+```
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 480767389086..95762d958521 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -557,62 +557,7 @@ trunc(numeric_expression[, decimal_places])
 
 ## Conditional Functions
 
-- [nullif](#nullif)
-- [nvl](#nvl)
-- [nvl2](#nvl2)
-- [ifnull](#ifnull)
-
-### `nullif`
-
-Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_.
-This can be used to perform the inverse operation of [`coalesce`](#coalesce).
-
-```
-nullif(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: Expression to compare and return if equal to expression2.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression2**: Expression to compare to expression1.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `nvl`
-
-Returns _expression2_ if _expression1_ is NULL; otherwise it returns _expression1_.
-
-```
-nvl(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: return if expression1 not is NULL.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression2**: return if expression1 is NULL.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `nvl2`
-
-Returns _expression2_ if _expression1_ is not NULL; otherwise it returns _expression3_.
-
-```
-nvl2(expression1, expression2, expression3)
-```
-
-#### Arguments
-
-- **expression1**: conditional expression.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression2**: return if expression1 is not NULL.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **expression3**: return if expression1 is NULL.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `ifnull`
-
-_Alias of [nvl](#nvl)._
+See the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
 
 ## String Functions
 
@@ -2806,93 +2751,9 @@ are not allowed
 
 ## Struct Functions
 
-- [struct](#struct)
-- [named_struct](#named_struct)
 - [unnest](#unnest-struct)
 
-### `struct`
-
-Returns an Arrow struct using the specified input expressions optionally named.
-Fields in the returned struct use the optional name or the `cN` naming convention.
-For example: `c0`, `c1`, `c2`, etc.
-
-```
-struct(expression1[, ..., expression_n])
-```
-
-For example, this query converts two columns `a` and `b` to a single column with
-a struct type of fields `field_a` and `c1`:
-
-```
-select * from t;
-+---+---+
-| a | b |
-+---+---+
-| 1 | 2 |
-| 3 | 4 |
-+---+---+
-
--- use default names `c0`, `c1`
-> select struct(a, b) from t;
-+-----------------+
-| struct(t.a,t.b) |
-+-----------------+
-| {c0: 1, c1: 2}  |
-| {c0: 3, c1: 4}  |
-+-----------------+
-
--- name the first field `field_a`
-select struct(a as field_a, b) from t;
-+--------------------------------------------------+
-| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) |
-+--------------------------------------------------+
-| {field_a: 1, c1: 2}                              |
-| {field_a: 3, c1: 4}                              |
-+--------------------------------------------------+
-```
-
-#### Arguments
-
-- **expression_n**: Expression to include in the output struct.
-  Can be a constant, column, or function, any combination of arithmetic or
-  string operators, or a named expression of previous listed .
-
-### `named_struct`
-
-Returns an Arrow struct using the specified name and input expressions pairs.
-
-```
-named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input])
-```
-
-For example, this query converts two columns `a` and `b` to a single column with
-a struct type of fields `field_a` and `field_b`:
-
-```
-select * from t;
-+---+---+
-| a | b |
-+---+---+
-| 1 | 2 |
-| 3 | 4 |
-+---+---+
-
-select named_struct('field_a', a, 'field_b', b) from t;
-+-------------------------------------------------------+
-| named_struct(Utf8("field_a"),t.a,Utf8("field_b"),t.b) |
-+-------------------------------------------------------+
-| {field_a: 1, field_b: 2}                              |
-| {field_a: 3, field_b: 4}                              |
-+-------------------------------------------------------+
-```
-
-#### Arguments
-
-- **expression_n_name**: Name of the column field.
-  Must be a constant string.
-- **expression_n_input**: Expression to include in the output struct.
-  Can be a constant, column, or function, and any combination of arithmetic or
-  string operators.
+For more struct functions see the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
 
 ### `unnest (struct)`
 
@@ -3068,83 +2929,4 @@ select map_values(map([100, 5], [42,43]));
 
 ## Other Functions
 
-- [arrow_cast](#arrow_cast)
-- [arrow_typeof](#arrow_typeof)
-- [version](#version)
-
-### `arrow_cast`
-
-Casts a value to a specific Arrow data type:
-
-```
-arrow_cast(expression, datatype)
-```
-
-#### Arguments
-
-- **expression**: Expression to cast.
-  Can be a constant, column, or function, and any combination of arithmetic or
-  string operators.
-- **datatype**: [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name
-  to cast to, as a string. The format is the same as that returned by [`arrow_typeof`]
-
-#### Example
-
-```
-> select arrow_cast(-5, 'Int8') as a,
-  arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
-1 row in set. Query took 0.001 seconds.
-```
-
-### `arrow_typeof`
-
-Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression:
-
-```
-arrow_typeof(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to evaluate.
-  Can be a constant, column, or function, and any combination of arithmetic or
-  string operators.
-
-#### Example
-
-```
-> select arrow_typeof('foo'), arrow_typeof(1);
-+---------------------------+------------------------+
-| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) |
-+---------------------------+------------------------+
-| Utf8                      | Int64                  |
-+---------------------------+------------------------+
-1 row in set. Query took 0.001 seconds.
-```
-
-### `version`
-
-Returns the version of DataFusion.
-
-```
-version()
-```
-
-#### Example
-
-```
-> select version();
-+--------------------------------------------+
-| version()                                  |
-+--------------------------------------------+
-| Apache DataFusion 41.0.0, aarch64 on macos |
-+--------------------------------------------+
-```
+See the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md
index 673c55f46b15..96fbcaa1104b 100644
--- a/docs/source/user-guide/sql/scalar_functions_new.md
+++ b/docs/source/user-guide/sql/scalar_functions_new.md
@@ -54,6 +54,10 @@ log(numeric_expression)
 ## Conditional Functions
 
 - [coalesce](#coalesce)
+- [ifnull](#ifnull)
+- [nullif](#nullif)
+- [nvl](#nvl)
+- [nvl2](#nvl2)
 
 ### `coalesce`
 
@@ -67,6 +71,117 @@ coalesce(expression1[, ..., expression_n])
 
 - **expression1, expression_n**: Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary.
 
+#### Example
+
+```sql
+> select coalesce(null, null, 'datafusion');
++----------------------------------------+
+| coalesce(NULL,NULL,Utf8("datafusion")) |
++----------------------------------------+
+| datafusion                             |
++----------------------------------------+
+```
+
+### `ifnull`
+
+_Alias of [nvl](#nvl)._
+
+### `nullif`
+
+Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_.
+This can be used to perform the inverse operation of [`coalesce`](#coalesce).
+
+```
+nullif(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: Expression to compare and return if equal to expression2. Can be a constant, column, or function, and any combination of operators.
+- **expression2**: Expression to compare to expression1. Can be a constant, column, or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select nullif('datafusion', 'data');
++-----------------------------------------+
+| nullif(Utf8("datafusion"),Utf8("data")) |
++-----------------------------------------+
+| datafusion                              |
++-----------------------------------------+
+> select nullif('datafusion', 'datafusion');
++-----------------------------------------------+
+| nullif(Utf8("datafusion"),Utf8("datafusion")) |
++-----------------------------------------------+
+|                                               |
++-----------------------------------------------+
+```
+
+### `nvl`
+
+Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.
+
+```
+nvl(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: Expression to return if not null. Can be a constant, column, or function, and any combination of operators.
+- **expression2**: Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select nvl(null, 'a');
++---------------------+
+| nvl(NULL,Utf8("a")) |
++---------------------+
+| a                   |
++---------------------+\
+> select nvl('b', 'a');
++--------------------------+
+| nvl(Utf8("b"),Utf8("a")) |
++--------------------------+
+| b                        |
++--------------------------+
+```
+
+#### Aliases
+
+- ifnull
+
+### `nvl2`
+
+Returns _expression2_ if _expression1_ is not NULL; otherwise it returns _expression3_.
+
+```
+nvl2(expression1, expression2, expression3)
+```
+
+#### Arguments
+
+- **expression1**: Expression to test for null. Can be a constant, column, or function, and any combination of operators.
+- **expression2**: Expression to return if expr1 is not null. Can be a constant, column, or function, and any combination of operators.
+- **expression3**: Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select nvl2(null, 'a', 'b');
++--------------------------------+
+| nvl2(NULL,Utf8("a"),Utf8("b")) |
++--------------------------------+
+| b                              |
++--------------------------------+
+> select nvl2('data', 'a', 'b');
++----------------------------------------+
+| nvl2(Utf8("data"),Utf8("a"),Utf8("b")) |
++----------------------------------------+
+| a                                      |
++----------------------------------------+
+```
+
 ## String Functions
 
 - [ascii](#ascii)
@@ -1159,6 +1274,102 @@ to_date('2017-05-31', '%Y-%m-%d')
 
 Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
 
+## Struct Functions
+
+- [named_struct](#named_struct)
+- [row](#row)
+- [struct](#struct)
+
+### `named_struct`
+
+Returns an Arrow struct using the specified name and input expressions pairs.
+
+```
+named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input])
+```
+
+#### Arguments
+
+- **expression_n_name**: Name of the column field. Must be a constant string.
+- **expression_n_input**: Expression to include in the output struct. Can be a constant, column, or function, and any combination of arithmetic or string operators.
+
+#### Example
+
+For example, this query converts two columns `a` and `b` to a single column with
+a struct type of fields `field_a` and `field_b`:
+
+```sql
+> select * from t;
++---+---+
+| a | b |
++---+---+
+| 1 | 2 |
+| 3 | 4 |
++---+---+
+> select named_struct('field_a', a, 'field_b', b) from t;
++-------------------------------------------------------+
+| named_struct(Utf8("field_a"),t.a,Utf8("field_b"),t.b) |
++-------------------------------------------------------+
+| {field_a: 1, field_b: 2}                              |
+| {field_a: 3, field_b: 4}                              |
++-------------------------------------------------------+
+```
+
+### `row`
+
+_Alias of [struct](#struct)._
+
+### `struct`
+
+Returns an Arrow struct using the specified input expressions optionally named.
+Fields in the returned struct use the optional name or the `cN` naming convention.
+For example: `c0`, `c1`, `c2`, etc.
+
+```
+struct(expression1[, ..., expression_n])
+```
+
+#### Arguments
+
+- **expression1, expression_n**: Expression to include in the output struct. Can be a constant, column, or function, any combination of arithmetic or string operators.
+
+#### Example
+
+For example, this query converts two columns `a` and `b` to a single column with
+a struct type of fields `field_a` and `c1`:
+
+```sql
+> select * from t;
++---+---+
+| a | b |
++---+---+
+| 1 | 2 |
+| 3 | 4 |
++---+---+
+
+-- use default names `c0`, `c1`
+> select struct(a, b) from t;
++-----------------+
+| struct(t.a,t.b) |
++-----------------+
+| {c0: 1, c1: 2}  |
+| {c0: 3, c1: 4}  |
++-----------------+
+
+-- name the first field `field_a`
+select struct(a as field_a, b) from t;
++--------------------------------------------------+
+| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) |
++--------------------------------------------------+
+| {field_a: 1, c1: 2}                              |
+| {field_a: 3, c1: 4}                              |
++--------------------------------------------------+
+```
+
+#### Aliases
+
+- row
+
 ## Hashing Functions
 
 - [digest](#digest)
@@ -1314,3 +1525,123 @@ sha512(expression)
 | <sha512_hash_result>                      |
 +-------------------------------------------+
 ```
+
+## Other Functions
+
+- [arrow_cast](#arrow_cast)
+- [arrow_typeof](#arrow_typeof)
+- [get_field](#get_field)
+- [version](#version)
+
+### `arrow_cast`
+
+Casts a value to a specific Arrow data type.
+
+```
+arrow_cast(expression, datatype)
+```
+
+#### Arguments
+
+- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators.
+- **datatype**: [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to cast to, as a string. The format is the same as that returned by [`arrow_typeof`]
+
+#### Example
+
+```sql
+> select arrow_cast(-5, 'Int8') as a,
+  arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
+  arrow_cast('bar', 'LargeUtf8') as c,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
+  ;
++----+-----+-----+---------------------------+
+| a  | b   | c   | d                         |
++----+-----+-----+---------------------------+
+| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
++----+-----+-----+---------------------------+
+```
+
+### `arrow_typeof`
+
+Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression.
+
+```
+arrow_typeof(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select arrow_typeof('foo'), arrow_typeof(1);
++---------------------------+------------------------+
+| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) |
++---------------------------+------------------------+
+| Utf8                      | Int64                  |
++---------------------------+------------------------+
+```
+
+### `get_field`
+
+Returns a field within a map or a struct with the given key.
+Note: most users invoke `get_field` indirectly via field access
+syntax such as `my_struct_col['field_name']` which results in a call to
+`get_field(my_struct_col, 'field_name')`.
+
+```
+get_field(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: The map or struct to retrieve a field for.
+- **expression2**: The field name in the map or struct to retrieve data for. Must evaluate to a string.
+
+#### Example
+
+```sql
+> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow');
+> select struct(idx, v) from t as c;
++-------------------------+
+| struct(c.idx,c.v)       |
++-------------------------+
+| {c0: data, c1: fusion}  |
+| {c0: apache, c1: arrow} |
++-------------------------+
+> select get_field((select struct(idx, v) from t), 'c0');
++-----------------------+
+| struct(t.idx,t.v)[c0] |
++-----------------------+
+| data                  |
+| apache                |
++-----------------------+
+> select get_field((select struct(idx, v) from t), 'c1');
++-----------------------+
+| struct(t.idx,t.v)[c1] |
++-----------------------+
+| fusion                |
+| arrow                 |
++-----------------------+
+```
+
+### `version`
+
+Returns the version of DataFusion.
+
+```
+version()
+```
+
+#### Example
+
+```sql
+> select version();
++--------------------------------------------+
+| version()                                  |
++--------------------------------------------+
+| Apache DataFusion 42.0.0, aarch64 on macos |
++--------------------------------------------+
+```