From 1730e51e60817fd98fc2041ffd01b702edc07fd2 Mon Sep 17 00:00:00 2001 From: rambip Date: Thu, 3 Aug 2023 22:50:36 +0200 Subject: [PATCH] moved parsing to its own crate and use custom Callback type --- Cargo.lock | 10 +- Cargo.toml | 4 +- examples/onclick/src/main.rs | 2 +- src/lib.rs | 35 ++-- src/parse/mod.rs | 321 ----------------------------------- src/parse/token.rs | 204 ---------------------- src/render.rs | 26 ++- src/utils.rs | 23 ++- 8 files changed, 61 insertions(+), 564 deletions(-) delete mode 100644 src/parse/mod.rs delete mode 100644 src/parse/token.rs diff --git a/Cargo.lock b/Cargo.lock index c7af14e..562eac9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -894,7 +894,7 @@ dependencies = [ "katex", "leptos", "log", - "pulldown-cmark", + "pulldown-cmark-wikilink", "syntect", "wasm-bindgen", "wasm-logger", @@ -1419,6 +1419,14 @@ dependencies = [ "unicase", ] +[[package]] +name = "pulldown-cmark-wikilink" +version = "0.1.0" +source = "git+https://github.com/rambip/pulldown-cmark-wikilink#61615998db86b4286d72d1a34552789a4a972893" +dependencies = [ + "pulldown-cmark", +] + [[package]] name = "quick-xml" version = "0.29.0" diff --git a/Cargo.toml b/Cargo.toml index 9434dc5..606447d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "leptos-markdown" -version = "0.2.0" +version = "0.3.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -8,7 +8,7 @@ edition = "2021" [dependencies] leptos = {version="0.4.6", features=["csr", "nightly"]} -pulldown-cmark = { git = "https://github.com/rhysd/pulldown-cmark.git", branch="math"} +pulldown-cmark-wikilink = {git="https://github.com/rambip/pulldown-cmark-wikilink"} syntect = { version = "5.0.0", default-features = false, features = ["default-fancy"]} katex = {version="0.4", default-features=false, features=["wasm-js"]} diff --git a/examples/onclick/src/main.rs b/examples/onclick/src/main.rs index 8662c6e..3da330a 100644 --- a/examples/onclick/src/main.rs +++ b/examples/onclick/src/main.rs @@ -35,7 +35,7 @@ fn App(cx: Scope) -> impl IntoView { view!{cx,
- +

{"markdown source:"}

diff --git a/src/lib.rs b/src/lib.rs index dc3e511..eb045b7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,3 @@ -#![feature(slice_group_by)] -#![feature(array_into_iter_constructors)] - use leptos::*; use leptos::html::AnyElement; @@ -11,10 +8,10 @@ pub use render::HtmlError; use web_sys::MouseEvent; -mod parse; -use parse::{parse, default_options}; +use pulldown_cmark_wikilink::{Parser, Options, LinkType}; mod utils; +use utils::Callback; use core::ops::Range; @@ -32,7 +29,7 @@ pub struct LinkDescription { pub title: String, /// the type of link - pub link_type: pulldown_cmark::LinkType, + pub link_type: LinkType, /// wether the link is an image pub image: bool, @@ -50,6 +47,7 @@ pub struct MarkdownMouseEvent { // pub tag: pulldown_cmark::Tag<'a>, } + #[component] pub fn Markdown( cx: Scope, @@ -61,12 +59,12 @@ pub fn Markdown( /// the callback called when a component is clicked. /// if you want to controll what happens when a link is clicked, /// use [`render_links`][render_links] - #[prop(optional)] - on_click: Option>, + #[prop(optional, into)] + on_click: Option>, /// - #[prop(optional)] - render_links: Option + #[prop(optional, into)] + render_links: Option, HtmlError>>>, /// the name of the theme used for syntax highlighting. @@ -82,8 +80,8 @@ pub fn Markdown( /// modify parse options. /// It take the default parse options and returns the options you want to enanble. /// For wikilinks, see the `wikilinks` prop. - #[prop(optional)] - parse_options: Option pulldown_cmark::Options>>, + #[prop(optional, into)] + parse_options: Option>, ) -> impl IntoView { @@ -91,21 +89,22 @@ pub fn Markdown( cx, theme, on_click, - render_links + render_links, ); let options = match parse_options { - Some(f) => f(default_options()), - None => default_options() + Some(f) => f.call(Options::all()), + None => Options::all(), }; view! {cx,
{move || src.with( |x| { - let stream = parse(x, &options, wikilinks); - log!("{stream:?}"); - Renderer::new(&context, &stream).collect_view(cx) + let stream : Vec<_> = Parser::new_ext(x, options, wikilinks) + .into_offset_iter() + .collect(); + Renderer::new(&context, &stream).collect_view(cx) }) }
diff --git a/src/parse/mod.rs b/src/parse/mod.rs deleted file mode 100644 index dad0766..0000000 --- a/src/parse/mod.rs +++ /dev/null @@ -1,321 +0,0 @@ -use pulldown_cmark::{Options, Parser, Event, Tag, TagEnd, LinkType}; -use core::ops::Range; - -mod token; -use token::{Lexer, Token}; - -use Token::*; - -use core::iter::Peekable; - - -pub fn default_options() -> Options { - let mut r = Options::all(); - r.set(Options::ENABLE_FOOTNOTES, false); - r -} - - -/// regroups adjacents text events. -/// if the events are [Text("a"), Text("b"), Link], the result will be -/// [Text("ab"), Link] -fn group_text<'a>(source: &'a str, events: Vec<(Event<'a>, Range)>) --> Vec<(Event<'a>, Range)> { - let mut result = Vec::with_capacity(events.len()); - - let mut last_text_range: Option> = None; - - for (t, r) in events { - last_text_range = match (t, std::mem::take(&mut last_text_range)) { - (Event::Text(_), Some(last_range)) => { - Some(last_range.start..r.end) - }, - (Event::Text(_), None) => { - Some(r) - }, - (t, Some(last_range)) => { - result.push((Event::Text(source[last_range.clone()].into()), last_range.clone())); - result.push((t, r)); - None - }, - (t, None) => { - result.push((t, r)); - None - } - } - } - - result -} - - -/// `parse(s, options, wikilinks)` returns a vector of [`Events`][pulldown_cmark::Event] -/// it adds inline wikilinks if the `wikilinks` flag is set to `true` -pub fn parse<'a>(source: &'a str, parse_options: &Options, wikilinks: bool) --> Vec<(Event<'a>, Range)>{ - let events = Parser::new_ext(source, parse_options.to_owned()) - .into_offset_iter() - .collect::>(); - - let events_grouped = group_text(source, events); - - if !wikilinks { - return events_grouped - } - - let mut result = Vec::new(); - - for item in events_grouped { - match item { - (Event::Text(_), r) => result.extend(MyParser::new_at(source, r)), - _ => result.push(item) - } - } - - result -} - -type PeekTokenStream<'a> = Peekable>; - -struct MyParser<'a> { - source: &'a str, - tokens: PeekTokenStream<'a>, - buffer: std::array::IntoIter<(Event<'a>, Range), 3> -} - - -enum ParseError { - Empty, - ReParse(Range) -} - -impl ParseError { - /// `error.extend_before(start..end)` returns a new error - /// that spans from start to the end of the error - /// (either end, either the original error end) - fn extend_before(self, r: Range) -> ParseError { - match self { - Self::Empty => Self::ReParse(r), - Self::ReParse(r2) => Self::ReParse(r.start..r2.end) - } - } -} - - -impl<'a> MyParser<'a> { - fn new_at(source: &'a str, position: Range) -> Self { - let text = &source[position.clone()]; - Self { - source, - tokens: Lexer::new_at(&text, position.start).peekable(), - buffer: std::array::IntoIter::empty(), - } - } - - /// in `[[url|link]]`, returns `url` and don't consume the `|` - fn parse_wikilink_first_field(&mut self) -> Result, ParseError> { - let start : usize = match self.tokens.peek(){ - Some((_, x)) => x.start, - None => return Err(ParseError::Empty) - }; - let mut end: usize = start.clone(); - loop { - match self.tokens.peek() { - Some((Pipe, _))| Some((RRBra, _)) => break Ok(start..end), - Some((_, _)) => { - end = self.tokens.next().unwrap().1.end; - } - None => return Err(ParseError::ReParse(start..end)), - } - } - } - - /// in `link]]`, returns `link` and don't consume the `]]` - fn parse_wikilink_alias(&mut self) -> Result, ParseError>{ - let start : usize = match self.tokens.peek(){ - Some((_, x)) => x.start.clone(), - None => return Err(ParseError::Empty) - }; - let mut end: usize = start.clone(); - loop { - match self.tokens.peek() { - Some((RRBra, _)) => return Ok(start..end), - Some((_, _)) => { - end = self.tokens.next().unwrap().1.end; - } - None => return Err(ParseError::ReParse(start..end)), - } - } - } - - /// parse an entire wikilink, ie one of - /// - `[[a shortcut url]]` - /// - `[[a url|with some displayed content]]` - fn parse_wikilink(&mut self) -> Result<[(Event<'a>, Range); 3], ParseError> { - let tag_pos = self.tokens.next().unwrap().1; - let url_pos = self.parse_wikilink_first_field() - .map_err(|x| x.extend_before(tag_pos.clone()))?; - - let opening_tag = Event::Start(Tag::Link( - LinkType::Inline, - self.source[url_pos.clone()].into(), - "wiki".into(), - )); - - let closing_tag = Event::End(TagEnd::Link); - - match self.tokens.next() { - Some((RRBra, x)) => { - Ok([ - (opening_tag, tag_pos.start..x.end), - (Event::Text(self.source[url_pos.clone()].into()), url_pos), - (closing_tag, tag_pos.start..x.end), - ]) - }, - Some((Pipe, _)) => { - let alias_pos = self.parse_wikilink_alias() - .map_err(|x| x.extend_before(tag_pos.clone()))?; - - let end = self.tokens.next().unwrap().1.end; - Ok([ - (opening_tag, tag_pos.start..end), - (Event::Text(self.source[alias_pos.clone()].into()), alias_pos), - (closing_tag, tag_pos.start..end), - ]) - } - _ => unreachable!() - } - } - - // parse a text until the first `[[` (start of wikilink) is encountered. - // don't consume the `[[` - fn parse_text(&mut self) -> Range { - let start = self.tokens.peek().unwrap().1.start.clone(); - let mut end = start.clone(); - loop { - match self.tokens.peek() { - Some((LLBra, _)) | None => return start..end, - Some((_, _)) => { - end = self.tokens.next().unwrap().1.end; - } - } - } - } -} - - -impl<'a> Iterator for MyParser<'a> { - type Item = (Event<'a>, Range); - fn next(&mut self) -> Option { - while let Some((x, r)) = self.buffer.next() { - return Some((x.clone(), r.clone())) - }; - match self.tokens.peek()? { - (LLBra, x) => { - let _start = x.start.clone(); - match self.parse_wikilink() { - Ok(b) => { - self.buffer = b.into_iter(); - self.next() - }, - Err(e) => { - let r = match e { - ParseError::ReParse(r) => r, - _ => unreachable!(), - }; - Some((Event::Text(self.source[r.clone()].into()), r)) - } - } - }, - (NewLine, _) => self.next(), - _ => { - let r = self.parse_text(); - Some((Event::Text(self.source[r.clone()].into()), r)) - } - } - } -} - -#[cfg(test)] -mod tests { - use wasm_test::*; - use super::*; - use pulldown_cmark::TagEnd; - - use Event::*; - use LinkType::*; - - #[wasm_test] - fn test_offset(){ - let s = "12345"; - let _parser = MyParser::new_at(s, 0..5); - } - - #[wasm_test] - fn parse_wikilink_no_alias() { - let s = "here is a wikilink: [[link]]"; - let tokens : Vec<_> = Lexer::new_at(s, 0).collect(); - println!("{tokens:?}"); - let events: Vec<_> = - MyParser::new_at(s, 0..28) - .collect(); - println!("{events:?}"); - assert_eq!(events, vec![ - (Text("here is a wikilink: ".into()), 0..20), - (Start(Tag::Link(Inline, "link".into(), "wiki".into())), 20..28), - (Text("link".into()), 22..26), - (End(TagEnd::Link), 20..28) - ]); - } - - #[wasm_test] - fn parse_wikilink_alias(){ - let s = "[[the url| with a strange content |😈| inside]]"; - - let events: Vec<_> = - MyParser::new_at(s, 0..s.len()) - .map(|(t, _)| t) - .collect(); - - println!("{events:?}"); - assert_eq!( - events, - vec![ - Start(Tag::Link(Inline, "the url".into(), "wiki".into())), - Text(" with a strange content |😈| inside".into()), - End(TagEnd::Link)] - ); - } - - #[wasm_test] - fn parse(){ - let s = "[[the url| with a strange content |😈| inside]]"; - - let events: Vec<_> = - parse(s, &default_options(), true) - .into_iter() - .map(|(t, _)| t) - .collect(); - - println!("{events:?}"); - assert_eq!( - events, - vec![ - Start(Tag::Paragraph), - Start(Tag::Link(Inline, "the url".into(), "wiki".into())), - Text(" with a strange content |😈| inside".into()), - End(TagEnd::Link), - End(TagEnd::Paragraph), - ] - ); - } - - #[wasm_test] - fn group_by() { - let slice = &[1, 2, 3, 1, 1, 1, 2, 1]; - let groups: Vec<_> = slice.group_by(|a,b| *a==1 && *b == 1) - .collect(); - - assert_eq!(groups, vec![vec![1], vec![2], vec![3], vec![1, 1, 1], vec![2], vec![1]]) - } -} diff --git a/src/parse/token.rs b/src/parse/token.rs deleted file mode 100644 index 27a0082..0000000 --- a/src/parse/token.rs +++ /dev/null @@ -1,204 +0,0 @@ -use core::ops::Range; - -#[derive(Debug, PartialEq)] -pub enum Token { - Pipe, - RBra, - LBra, - RRBra, - LLBra, - Word, - NewLine, -} - -use Token::*; - -/// possible states of the state machine. -/// This implementation is almost a pure DFA -enum State { - Default, - AfterPipe, - AfterOpen1, - AfterOpen2, - AfterOpen3, - AfterClose1, - AfterClose2, - AfterClose3, - AfterSymbol, - AfterReturn, -} - -impl Default for State { - fn default() -> Self { - State::Default - } -} - -impl State { - /// `s.finalize()` returns the extra token that would be returned - /// if the stream ended in the state `s` - fn finalize(self: State) -> Option { - use State::*; - - Some(match self { - AfterPipe => Pipe, - AfterOpen1 => LBra, - AfterOpen2 => LLBra, - AfterOpen3 => LBra, - AfterClose1 => RBra, - AfterClose2 => RRBra, - AfterClose3 => RBra, - AfterSymbol => Word, - AfterReturn => NewLine, - Default => return None, - }) - } -} - - -pub struct Lexer<'a> { - /// the state of the automata - state: State, - - /// the stream of characacters to parse - source: core::str::Chars<'a>, - - /// the current position inside the original slice of text. - /// Each time a char `c` is read, the cursor increase by the utf8 size of `c` - cursor: usize, - - /// last time a token was returned - last_token_end: usize, -} - -impl<'a> Lexer<'a> { - pub fn new_at(source: &'a str, index: usize) -> Lexer<'a> { - Lexer { - source: source.chars(), - cursor: index, - state: State::Default, - last_token_end: index, - } - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = (Token, Range); - - fn next(&mut self) -> Option { - use State::*; - - for c in self.source.by_ref() { - - let state = std::mem::take(&mut self.state); - - let (new_state, state_to_finalize) = match (c, state) { - ('\r', s) => (s, None), - ('\n', s) => (AfterReturn, Some(s)), - ('[', AfterOpen1) => (AfterOpen2, None), - ('[', s@AfterOpen2) => (AfterOpen3, Some(s)), - ('[', s@AfterOpen3) => (AfterOpen3, Some(s)), - ('[', s) => (AfterOpen1, Some(s)), - (']', AfterClose1) => (AfterClose2, None), - (']', s@AfterClose2)=> (AfterClose3, Some(s)), - (']', s@AfterClose3)=> (AfterClose3, Some(s)), - (']', s) => (AfterClose1, Some(s)), - ('|', s) => (AfterPipe, Some(s)), - (_, AfterSymbol) => (AfterSymbol, None), - (_, s) => (AfterSymbol, Some(s)) - - }; - - self.state = new_state; - - let last_cursor = self.cursor.clone(); - self.cursor += c.len_utf8(); - - if let Some(t) = state_to_finalize.and_then(|x| x.finalize()) { - - let position = Range { - end: last_cursor, - start: std::mem::replace(&mut self.last_token_end, last_cursor), - }; - - return Some((t, position)); - } - } - - if let Some(t) = std::mem::take(&mut self.state).finalize() { - let position = Range { - start: std::mem::replace(&mut self.last_token_end, self.cursor.clone()), - end: self.cursor, - }; - return Some((t, position)); - } - None - } -} - -#[cfg(test)] -mod tests { - use wasm_test::*; - use super::*; - - #[wasm_test] - fn test_stream(){ - let source = "[abc] [[ d e]]\nb"; - let stream: Vec = Lexer::new_at(source, 0) - .map(|(t, _)| t) - .collect(); - println!("{stream:?}"); - assert_eq!(stream, - vec![ - LBra, - Word, - RBra, - Word, - LLBra, - Word, - RRBra, - NewLine, - Word, - ] - ); - } - - #[wasm_test] - fn test_stream_double_bracket(){ - let source = "[[["; - let stream: Vec<(Token, _)> - = Lexer::new_at(source, 0).collect(); - - println!("{stream:?}"); - assert_eq!(stream, - vec![ - (LLBra, 0..2), - (LBra, 2..3), - ] - ); - } - - #[wasm_test] - fn lexer_emoji(){ - let source = "[[the url| with a strange content |😈| inside]]"; - let stream : Vec<_> = - Lexer::new_at(source, 0) - .map(|(token, range)| (token, &source[range])) - .collect(); - - println!("{stream:?}"); - assert_eq!(stream, - vec![ - (LLBra, "[["), - (Word, "the url"), - (Pipe, "|"), - (Word, " with a strange content "), - (Pipe, "|"), - (Word, "😈"), - (Pipe, "|"), - (Word, " inside"), - (RRBra, "]]") - ]); - } -} - diff --git a/src/render.rs b/src/render.rs index 8b8e280..3c310bd 100644 --- a/src/render.rs +++ b/src/render.rs @@ -1,7 +1,6 @@ use leptos::*; use leptos::html::AnyElement; -use std::rc::Rc; use core::ops::Range; use katex; @@ -10,9 +9,9 @@ use syntect::highlighting::{ThemeSet, Theme}; use web_sys::MouseEvent; -use pulldown_cmark::{Event, Tag, CodeBlockKind, Alignment, MathDisplay, HeadingLevel}; +use pulldown_cmark_wikilink::{Event, Tag, CodeBlockKind, Alignment, MathDisplay, HeadingLevel}; -use crate::utils::as_closing_tag; +use crate::utils::{as_closing_tag, Callback}; use super::{LinkDescription, MarkdownMouseEvent}; type Html = HtmlElement; @@ -27,7 +26,7 @@ pub fn make_callback(context: &RenderContext, position: Range) mouse_event: x, position: position.clone() }; - onclick(click_event) + onclick.call(click_event) } } @@ -43,18 +42,18 @@ pub struct RenderContext { theme: Theme, /// callback to add interactivity to the rendered markdown - onclick: Rc, + onclick: Callback, /// callback used to render links - render_links: Option Result>>, + render_links: Option>>, } impl RenderContext { pub fn new(cx: Scope, theme_name: Option, - onclick: Option>, - render_links: Option Result>>) + onclick: Option>, + render_links: Option>>) -> Self { let theme_set = ThemeSet::load_defaults(); @@ -66,16 +65,11 @@ impl RenderContext let syntax_set = SyntaxSet::load_defaults_newlines(); - let onclick : Rc_> = match onclick { - Some(x) => Rc::new(x), - None => Rc::new(|_| ()) - }; - RenderContext { cx, syntax_set, theme, - onclick, + onclick: onclick.unwrap_or(Callback::new(|_| ())), render_links, } } @@ -283,7 +277,7 @@ fn render_tasklist_marker(context: &RenderContext, m: bool, position: Range @@ -419,7 +413,7 @@ fn render_link(context: &RenderContext, link: LinkDescription) { let cx = context.cx; match (&context.render_links, link.image) { - (Some(f), _) => f(link), + (Some(f), _) => f.call(link), (None, false) => Ok(view!{cx, {link.content} diff --git a/src/utils.rs b/src/utils.rs index 7572cde..7b662de 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,5 @@ -use pulldown_cmark::{Tag, TagEnd}; +use pulldown_cmark_wikilink::{Tag, TagEnd}; +use std::rc::Rc; pub fn as_closing_tag(t: &Tag) -> TagEnd { match t { @@ -21,3 +22,23 @@ pub fn as_closing_tag(t: &Tag) -> TagEnd { Tag::MetadataBlock(k) => TagEnd::MetadataBlock(*k), } } + +#[derive(Clone)] +pub struct Callback(Rc Out>); + +impl Callback { + pub fn new B + 'static>(f: F) -> Self { + Callback(Rc::new(f)) + } + + pub fn call(&self, value: A) -> B { + self.0(value) + } +} + +impl From for Callback +where F: Fn(A) -> B + 'static { + fn from(value: F) -> Callback { + Callback(Rc::new(value)) + } +}