diff --git a/crates/polars-sql/src/functions.rs b/crates/polars-sql/src/functions.rs index 2712a44a2e27..c3dbf79773b0 100644 --- a/crates/polars-sql/src/functions.rs +++ b/crates/polars-sql/src/functions.rs @@ -379,9 +379,9 @@ pub(crate) enum PolarsSQLFunctions { LTrim, /// SQL 'normalize' function /// Convert string to Unicode normalization form - /// (one of "NFC", "NFKC", "NFD", or "NFKD"). + /// (one of NFC, NFKC, NFD, or NFKD - unquoted). /// ```sql - /// SELECT NORMALIZE(column_1, 'NFC') FROM df; + /// SELECT NORMALIZE(column_1, NFC) FROM df; /// ``` Normalize, /// SQL 'octet_length' function diff --git a/py-polars/docs/source/reference/sql/functions/string.rst b/py-polars/docs/source/reference/sql/functions/string.rst index 555e696f8dc6..3552826c096b 100644 --- a/py-polars/docs/source/reference/sql/functions/string.rst +++ b/py-polars/docs/source/reference/sql/functions/string.rst @@ -27,6 +27,8 @@ String - Returns a lowercased column. * - :ref:`LTRIM ` - Strips whitespaces from the left. + * - :ref:`NORMALIZE ` + - Convert string to the specified Unicode normalization form (one of NFC, NFD, NFKC, NFKD). * - :ref:`OCTET_LENGTH ` - Returns the length of a given string in bytes. * - :ref:`REGEXP_LIKE ` @@ -366,6 +368,39 @@ Strips whitespaces from the left. # │ DD ┆ DD │ # └───────┴─────────┘ +.. _normalize: + +NORMALIZE +--------- +Convert string to the specified Unicode normalization form (one of NFC, NFD, NFKC, NFKD). +If the normalization form is not provided, NFC is used by default. + +**Example:** + +.. code-block:: python + + df = pl.DataFrame({ + "txt": [ + "Test", + "Ⓣⓔⓢⓣ", + "𝕿𝖊𝖘𝖙", + "𝕋𝕖𝕤𝕥", + "𝗧𝗲𝘀𝘁", + ], + }) + df.sql(""" + SELECT NORMALIZE(txt, NFKC) FROM self + """).to_series() + # shape: (5,) + # Series: 'txt' [str] + # [ + # "Test" + # "Test" + # "Test" + # "Test" + # "Test" + # ] + .. _octet_length: OCTET_LENGTH