From 6f16d665b602fbab346bb520134d75deee3734ac Mon Sep 17 00:00:00 2001 From: Farooq Karimi Zadeh Date: Tue, 12 Mar 2024 18:07:26 +0330 Subject: [PATCH] fix some problems with is_in_one_of_ranges and updating the code: --- src/parser/parse_from_text/find_range.rs | 5 +- .../hashtag_content_char_ranges.rs | 48 +++++++++---------- src/parser/parse_from_text/link_element.rs | 19 ++++++-- .../parse_from_text/markdown_elements.rs | 4 +- src/parser/parse_from_text/text_elements.rs | 4 +- 5 files changed, 45 insertions(+), 35 deletions(-) diff --git a/src/parser/parse_from_text/find_range.rs b/src/parser/parse_from_text/find_range.rs index b8a74bd..3090ed5 100644 --- a/src/parser/parse_from_text/find_range.rs +++ b/src/parser/parse_from_text/find_range.rs @@ -39,11 +39,10 @@ fn find_range_for_char<'a>(code: u32, ranges: &'a [RangeInclusive]) -> Find /// /// # Arguments /// -/// - `c` A character +/// - `c` A number(u32) /// /// - `ranges` A sorted slice of ranges to see if `c` is in anyone of them -pub fn is_in_one_of_ranges(c: char, ranges: &[RangeInclusive]) -> bool { - let c = c as u32; +pub fn is_in_one_of_ranges(c: u32, ranges: &[RangeInclusive]) -> bool { match find_range_for_char(c, ranges) { FindRangeResult::WasOnRangeStart => true, FindRangeResult::Range(range) => range.contains(&c), diff --git a/src/parser/parse_from_text/hashtag_content_char_ranges.rs b/src/parser/parse_from_text/hashtag_content_char_ranges.rs index 1c934fe..642943d 100644 --- a/src/parser/parse_from_text/hashtag_content_char_ranges.rs +++ b/src/parser/parse_from_text/hashtag_content_char_ranges.rs @@ -876,41 +876,39 @@ pub(crate) fn hashtag_content_char(c: char) -> bool { } else if matches!(c, '+' | '-' | '_') { true } else { - is_in_one_of_ranges(c, &HASHTAG_CONTENT_CHAR_RANGES[..]) + is_in_one_of_ranges(c as u32, &HASHTAG_CONTENT_CHAR_RANGES[..]) } } #[cfg(test)] mod test { use crate::parser::parse_from_text::hashtag_content_char_ranges::hashtag_content_char; - - use super::{find_range_for_char, FindRangeResult, RangeInclusive}; + use crate::parser::parse_from_text::find_range::is_in_one_of_ranges; + use std::ops::RangeInclusive; #[test] fn test_range_function() { - // these must return WasOnRangeStart - let codes: Vec = vec![0x30000, 0xe0100, 0x23, 0x30, 0x171f, 0x176e, 0x10fb0]; - for code in codes.iter() { - assert_eq!(find_range_for_char(*code), FindRangeResult::WasOnRangeStart); - } - - // these must be return associated ranges - let codes: Vec<(u32, RangeInclusive)> = vec![ - (0x11066 + 5, 0x11066..=0x11075), // in range - (0x11000 + 10, 0x11000..=0x11046), // in range - (0x11046 + 2, 0x11000..=0x11046), // out of range - (0x10, 0x23..=0x23), - (0x09, 0x23..=0x23), - (0x0, 0x23..=0x23), - (0x25, 0x23..=0x23), - (0x2a + 1, 0x2a..=0x2a), - (0xfffff, 0xe0100..=0xe01ef), - // ^ this is beyond ranges and must return the - // last range + let ranges: [RangeInclusive; 5] = [ + 0x0..=0x30, + 0x99..=0x99, + 0x1f..=0x2f, + 0xff..=0xff, + 0x1000f..=0x20000, ]; - - for (code, range) in codes.iter() { - assert_eq!(find_range_for_char(*code), FindRangeResult::Range(range)); + let codes: Vec<(u32, bool)> = vec![ + (0x30000, false), + (0x01, true), + (0x23, true), + (0x30, false), + (0x171f, false), + (0x176e, false), + (0x10fb0, true), + (0x0, true), + (0xf1, false) + ]; + for (code, result) in codes.iter() { + assert_eq!(is_in_one_of_ranges(*code, &ranges[..]), *result); + println!("{code}, {result}"); } } diff --git a/src/parser/parse_from_text/link_element.rs b/src/parser/parse_from_text/link_element.rs index 9b18a37..d8bff04 100644 --- a/src/parser/parse_from_text/link_element.rs +++ b/src/parser/parse_from_text/link_element.rs @@ -53,7 +53,7 @@ const UCSCHAR_RANGES: [RangeInclusive; 17] = [ ]; fn is_ucschar(c: char) -> bool { - is_in_one_of_ranges(c, &UCSCHAR_RANGES[..]) + is_in_one_of_ranges(c as u32, &UCSCHAR_RANGES[..]) } fn is_unreserved(c: char) -> bool { @@ -276,7 +276,7 @@ const IPRIVATE_RANGES: [RangeInclusive; 3] = [0xe000..=0xf8ff, 0xf0000..=0xffffd, 0x100000..=0x10fffd]; fn is_iprivate(c: char) -> bool { - is_in_one_of_ranges(c, &IPRIVATE_RANGES[..]) + is_in_one_of_ranges(c as u32, &IPRIVATE_RANGES[..]) } fn is_iquery_not_pct_encoded(c: char) -> bool { @@ -349,7 +349,7 @@ fn get_puny_code_warning(link: &str, host: &str) -> Option { } } -pub fn link(input: &str) -> IResult<&str, Element, CustomError<&str>> { +fn parse_iri(input: &str) -> IResult<&str, Element, CustomError<&str>> { let input_ = <&str>::clone(&input); let (input, scheme) = scheme(input)?; let (input, (ihier, host, is_ipv6_or_future)) = ihier_part(input)?; @@ -371,3 +371,16 @@ pub fn link(input: &str) -> IResult<&str, Element, CustomError<&str>> { }, )) } + +fn parse_irelative_ref(input: &str) -> IResult<&str, Element, CustomError<&str>> { + todo!() +} + +pub fn parse_link(input: &str) -> IResult<&str, Element, CustomError<&str>> { + /* + match parse_iri(input) { + Ok((input, iri)) => Ok((input, iri)), + Err(..) => parse_irelative_ref(input), + }*/ + parse_iri(input) +} diff --git a/src/parser/parse_from_text/markdown_elements.rs b/src/parser/parse_from_text/markdown_elements.rs index 5dd2e61..7123c21 100644 --- a/src/parser/parse_from_text/markdown_elements.rs +++ b/src/parser/parse_from_text/markdown_elements.rs @@ -7,7 +7,7 @@ use super::text_elements::parse_text_element; use super::Element; use super::{base_parsers::*, parse_all}; use crate::parser::link_url::LinkDestination; -use crate::parser::parse_from_text::link_element::link; +use crate::parser::parse_from_text::link_element::parse_link; ///! nom parsers for markdown elements use nom::{ bytes::complete::{is_not, tag, take, take_while}, @@ -111,7 +111,7 @@ pub(crate) fn delimited_link(input: &str) -> IResult<&str, Element, CustomError< return Err(nom::Err::Failure(CustomError::)); } };*/ - let (rest, link) = link(input)?; + let (rest, link) = parse_link(input)?; if !rest.is_empty() { return Err(nom::Err::Error(CustomError::UnexpectedContent)); } diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs index a04d040..27e5b97 100644 --- a/src/parser/parse_from_text/text_elements.rs +++ b/src/parser/parse_from_text/text_elements.rs @@ -4,7 +4,7 @@ use crate::parser::link_url::LinkDestination; use super::base_parsers::CustomError; use super::base_parsers::*; use super::hashtag_content_char_ranges::hashtag_content_char; -use super::link_element::link; +use super::link_element::parse_link; use super::Element; use crate::nom::{Offset, Slice}; use nom::bytes::complete::take_while; @@ -280,7 +280,7 @@ pub(crate) fn parse_text_element( Ok((i, elm)) } else if let Ok((i, elm)) = email_address(input) { Ok((i, elm)) - } else if let Ok((i, elm)) = link(input) { + } else if let Ok((i, elm)) = parse_link(input) { Ok((i, elm)) } else if let Ok((i, _)) = linebreak(input) { Ok((i, Element::Linebreak))