From e4fa5fc820f16c2b86c428475d1d92b05c6c65d0 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Tue, 12 Mar 2024 17:49:07 +0100 Subject: [PATCH] Support markdown formatted codeblocks --- charabia/src/separators.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charabia/src/separators.rs b/charabia/src/separators.rs index 5405be86..3d4a8498 100644 --- a/charabia/src/separators.rs +++ b/charabia/src/separators.rs @@ -12,6 +12,7 @@ /// - Zp Paragraph Separator /// - Zs Space Separator /// plus ". ", ", " and ។ល។" (៘ decomposition) to categorize them as hard separators +/// and "`" to understand markdown formatted text #[rustfmt::skip] pub const DEFAULT_SEPARATORS: &[&str] = &[ ". ", ", ", "_", "‿", "⁀", "⁔", "︳", "︴", "﹍", "﹎", "﹏", "_", "-", "֊", "־", "᐀", "᠆", "‐", "‒", "–", @@ -58,7 +59,7 @@ pub const DEFAULT_SEPARATORS: &[&str] = &[ "𑪠", "𑪡", "𑪢", "𑱁", "𑱂", "𑱃", "𑱄", "𑱅", "𑱰", "𑱱", "𑻷", "𑻸", "𑿿", "𒑰", "𒑱", "𒑲", "𒑳", "𒑴", "𖩮", "𖩯", "𖫵", "𖬷", "𖬸", "𖬹", "𖬺", "𖬻", "𖭄", "𖺗", "𖺘", "𖺙", "𖺚", "𖿢", "𛲟", "𝪇", "𝪈", "𝪉", "𝪊", "𝪋", "𞥞", "𞥟", "\n", "\r", "\u{2029}", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", - " ", " " + " ", " ", "`" ]; #[rustfmt::skip]