From 6c259a42a6268ddaa609fc41ba913498b7fb83f8 Mon Sep 17 00:00:00 2001 From: Simon Laux Date: Thu, 9 Jan 2025 21:31:25 +0100 Subject: [PATCH] Don't consume/eat the the exlaimation mark at the end of a link (#85) * Don't consume/eat the the exlaimation mark at the end of a link closes #81 * add changelog entry --- CHANGELOG.md | 1 + src/parser/link_url/mod.rs | 2 +- src/parser/link_url/parse_link.rs | 2 +- .../exclamation_mark_at_end_of_link_81.rs | 97 +++++++++++++++++++ tests/based_on_issue/mod.rs | 1 + tests/text_to_ast/mod.rs | 10 +- 6 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 tests/based_on_issue/exclamation_mark_at_end_of_link_81.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b3c556..4ed2544 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased - fix: parse fediverse addresses as text, so they are not mistaken for email addresses ([issue #82](https://github.com/deltachat/message-parser/issues/82)) +- fix: don't consume/eat the the exlaimation mark at the end of a link #85 ## 0.11.0 - Bug fixes for Link Parsing diff --git a/src/parser/link_url/mod.rs b/src/parser/link_url/mod.rs index 9473ba2..f82a5ec 100644 --- a/src/parser/link_url/mod.rs +++ b/src/parser/link_url/mod.rs @@ -57,7 +57,7 @@ impl LinkDestination<'_> { pub fn parse_labelled(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { let (mut remaining, mut link) = Self::parse(input)?; if let Some(first) = remaining.chars().next() { - if matches!(first, ';' | '.' | ',' | ':') { + if matches!(first, ';' | '.' | ',' | ':' | '!') { // ^ markdown labelled links can include one of these characters at the end // and it's therefore part of the link let point = link.target.len().saturating_add(1); diff --git a/src/parser/link_url/parse_link.rs b/src/parser/link_url/parse_link.rs index 74776fb..dc3635a 100644 --- a/src/parser/link_url/parse_link.rs +++ b/src/parser/link_url/parse_link.rs @@ -400,7 +400,7 @@ fn parse_iri(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { .saturating_add(fragment.len()); // compute length of link which is ihier_len + scheme + query + fragment if let Some(link) = input_.get(0..len) { - if link.ends_with([':', ';', '.', ',']) { + if link.ends_with([':', ';', '.', ',', '!']) { len = len.saturating_sub(1); if path.is_empty() && query.is_empty() && fragment.is_empty() { host = input_.slice(scheme.len().saturating_add(3)..input_.len().saturating_sub(1)); diff --git a/tests/based_on_issue/exclamation_mark_at_end_of_link_81.rs b/tests/based_on_issue/exclamation_mark_at_end_of_link_81.rs new file mode 100644 index 0000000..e5c84b5 --- /dev/null +++ b/tests/based_on_issue/exclamation_mark_at_end_of_link_81.rs @@ -0,0 +1,97 @@ +use deltachat_message_parser::parser::Element::*; +use deltachat_message_parser::parser::{parse_desktop_set, parse_markdown_text, parse_only_text}; + +use crate::text_to_ast::https_link_no_puny; + +/// don't eat/consume the ! at the end of a link +/// as disscussed in https://github.com/deltachat/message-parser/issues/81 + +#[test] +fn text_only() { + assert_eq!( + parse_only_text("This is an my site: https://delta.chat!"), + vec![ + Text("This is an my site: "), + Link { + destination: https_link_no_puny("https://delta.chat", "delta.chat",) + }, + Text("!") + ] + ); + assert_eq!( + parse_only_text("This is an my site: https://delta.chat#!test"), + vec![ + Text("This is an my site: "), + Link { + destination: https_link_no_puny("https://delta.chat#!test", "delta.chat",) + } + ] + ); +} + +#[test] +fn desktop_set() { + assert_eq!( + parse_desktop_set("This is an my site: https://delta.chat!"), + vec![ + Text("This is an my site: "), + Link { + destination: https_link_no_puny("https://delta.chat", "delta.chat",) + }, + Text("!") + ] + ); +} + +#[test] +fn desktop_set_negative() { + assert_eq!( + parse_desktop_set("This is an my site: https://delta.chat#!test"), + vec![ + Text("This is an my site: "), + Link { + destination: https_link_no_puny("https://delta.chat#!test", "delta.chat",) + } + ] + ); +} + +#[test] +fn markdown() { + assert_eq!( + parse_markdown_text("This is an my site: https://delta.chat!"), + vec![ + Text("This is an my site: "), + Link { + destination: https_link_no_puny("https://delta.chat", "delta.chat",) + }, + Text("!") + ] + ); +} +#[test] +fn markdown_negative() { + assert_eq!( + parse_markdown_text("This is an my site: https://delta.chat#!test"), + vec![ + Text("This is an my site: "), + Link { + destination: https_link_no_puny("https://delta.chat#!test", "delta.chat",) + } + ] + ); +} + +#[test] +fn still_take_whole_link_in_labled_links() { + assert_eq!( + parse_markdown_text("This is an my [site](https://delta.chat/!)"), + vec![ + Text("This is an my "), + LabeledLink { + label: vec![Text("site")], + destination: https_link_no_puny("https://delta.chat/!", "delta.chat",) + } + ] + ); +} diff --git a/tests/based_on_issue/mod.rs b/tests/based_on_issue/mod.rs index aff8e6c..219f856 100644 --- a/tests/based_on_issue/mod.rs +++ b/tests/based_on_issue/mod.rs @@ -1 +1,2 @@ +pub mod exclamation_mark_at_end_of_link_81; pub mod fediverse_handle_82; diff --git a/tests/text_to_ast/mod.rs b/tests/text_to_ast/mod.rs index 0d2c471..ab434c7 100644 --- a/tests/text_to_ast/mod.rs +++ b/tests/text_to_ast/mod.rs @@ -1,7 +1,7 @@ use deltachat_message_parser::parser::Element::*; use deltachat_message_parser::parser::LinkDestination; -fn gopher_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { +pub(crate) fn gopher_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { LinkDestination { target, hostname: Some(hostname), @@ -10,7 +10,7 @@ fn gopher_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestinatio } } -fn http_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { +pub(crate) fn http_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { LinkDestination { target, hostname: Some(hostname), @@ -19,7 +19,7 @@ fn http_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination< } } -fn ftp_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { +pub(crate) fn ftp_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { LinkDestination { target, hostname: Some(hostname), @@ -28,7 +28,7 @@ fn ftp_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<' } } -fn https_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { +pub(crate) fn https_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { LinkDestination { target, hostname: Some(hostname), @@ -37,7 +37,7 @@ fn https_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination } } -fn mailto_link_no_puny(target: &str) -> LinkDestination<'_> { +pub(crate) fn mailto_link_no_puny(target: &str) -> LinkDestination<'_> { LinkDestination { target, hostname: None,