From 6274fe380df14287fc2a7ccf38cb69b27b7a2fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 16 Jan 2024 22:41:53 +0000 Subject: [PATCH] markdown: parse fenced_code_attributes extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pandoc supports this extension: https://pandoc.org/MANUAL.html#extension-fenced_code_attributes ``` {#identifier .language attr="value"} ``` And this: ``` language {#identifier attr="value"} ``` Recognize them in the lexer. Try to limit the complexity of the regular expression by splitting off parsing of attributes into a separate 'parse' (otherwise we hit automata size limits in `ocamllex`). According to https://quarto.org/docs/authoring/markdown-basics.html#ordering-of-attributes the ordering has to be: * #identifiers * .classes * key-value attributes Signed-off-by: Edwin Török --- lib/block.ml | 62 +++++++++++++++---- lib/block.mli | 3 + lib/lexer_mdx.mll | 10 +-- lib/mli_parser.ml | 2 +- .../mdx-test/expect/attributes/test-case.md | 33 ++++++++++ .../expect/attributes/test-case.md.expected | 33 ++++++++++ test/bin/mdx-test/expect/dune.inc | 12 ++++ test/lib/test_block.ml | 1 + test/lib/test_dep.ml | 1 + 9 files changed, 141 insertions(+), 16 deletions(-) create mode 100644 test/bin/mdx-test/expect/attributes/test-case.md create mode 100644 test/bin/mdx-test/expect/attributes/test-case.md.expected diff --git a/lib/block.ml b/lib/block.ml index aa972fe4c..0865b7e25 100644 --- a/lib/block.ml +++ b/lib/block.ml @@ -66,11 +66,23 @@ module Raw = struct contents : string list; label_cmt : string option; legacy_labels : string; + attributes : string option; errors : Output.t list; } - let make ~loc ~section ~header ~contents ~label_cmt ~legacy_labels ~errors = - Any { loc; section; header; contents; label_cmt; legacy_labels; errors } + let make ~loc ~section ~header ~contents ~label_cmt ~legacy_labels ~attributes + ~errors = + Any + { + loc; + section; + header; + contents; + label_cmt; + legacy_labels; + attributes; + errors; + } let make_include ~loc ~section ~labels = Include { loc; section; labels } end @@ -114,6 +126,7 @@ type t = { os_type_enabled : bool; set_variables : (string * string) list; unset_variables : string list; + attributes : string list; delim : string option; value : value; } @@ -263,7 +276,7 @@ let pp_header ?syntax ppf t = Fmt.(option string) t.delim pp_lang_header lang_headers pp_labels other_labels | Some Syntax.Cram -> pp_labels ?syntax ppf t.labels - | Some Syntax.Markdown | None -> + | Some Syntax.Markdown | None -> ( if t.legacy_labels then Fmt.pf ppf "```%a%a" Fmt.(option Header.pp) @@ -271,7 +284,10 @@ let pp_header ?syntax ppf t = else Fmt.pf ppf "%a```%a" (pp_labels ?syntax) t.labels Fmt.(option Header.pp) - (header t) + (header t); + match t.attributes with + | [] | [ "" ] -> () + | attrs -> Fmt.pf ppf " {%a}" Fmt.(string |> list ~sep:(any " ")) attrs) let pp ?syntax ppf b = pp_header ?syntax ppf b; @@ -459,7 +475,8 @@ let infer_block ~loc ~config ~header ~contents ~errors = let+ () = check_no_errors ~loc errors in Raw { header }) -let mk ~loc ~section ~labels ~legacy_labels ~header ~delim ~contents ~errors = +let mk ~loc ~section ~labels ~legacy_labels ~header ~delim ~contents ~attributes + ~errors = let block_kind = get_label (function Block_kind x -> Some x | _ -> None) labels in @@ -486,6 +503,7 @@ let mk ~loc ~section ~labels ~legacy_labels ~header ~delim ~contents ~errors = os_type_enabled; set_variables = config.set_variables; unset_variables = config.unset_variables; + attributes; delim; value; } @@ -495,7 +513,7 @@ let mk_include ~loc ~section ~labels = | Some file_inc -> let header = Header.infer_from_file file_inc in mk ~loc ~section ~labels ~legacy_labels:false ~header ~contents:[] - ~errors:[] ~delim:None + ~errors:[] ~delim:None ~attributes:[] | None -> label_required ~loc ~label:"file" ~kind:"include" let parse_labels ~label_cmt ~legacy_labels = @@ -513,15 +531,37 @@ let from_raw raw = | Raw.Include { loc; section; labels } -> let* labels = locate_errors ~loc (Label.of_string labels) in Util.Result.to_error_list @@ mk_include ~loc ~section ~labels - | Raw.Any { loc; section; header; contents; label_cmt; legacy_labels; errors } - -> - let header = Header.of_string header in + | Raw.Any + { + loc; + section; + header; + contents; + label_cmt; + legacy_labels; + attributes; + errors; + } -> + let attributes = + String.split_on_char ' ' (Option.value ~default:"" attributes) + in + let attr_classes = + attributes |> List.filter @@ String.starts_with ~prefix:"." + in + let header, attributes = + match (Header.of_string header, attr_classes) with + | None, lang :: _ -> + ( lang |> Astring.String.with_range ~first:1 |> Header.of_string, + List.filter (fun a -> a <> lang) attributes ) + | (Some _ as some), _ -> (some, attributes) + | None, [] -> (None, attributes) + in let* labels, legacy_labels = locate_errors ~loc (parse_labels ~label_cmt ~legacy_labels) in Util.Result.to_error_list - @@ mk ~loc ~section ~header ~contents ~labels ~legacy_labels ~errors - ~delim:None + @@ mk ~loc ~section ~header ~contents ~labels ~legacy_labels ~attributes + ~errors ~delim:None let is_active ?section:s t = let active = diff --git a/lib/block.mli b/lib/block.mli index b023265df..2ec2903f8 100644 --- a/lib/block.mli +++ b/lib/block.mli @@ -84,6 +84,7 @@ module Raw : sig contents:string list -> label_cmt:string option -> legacy_labels:string -> + attributes:string option -> errors:Output.t list -> t @@ -105,6 +106,7 @@ type t = { (** Whether the current os type complies with the block's version. *) set_variables : (string * string) list; unset_variables : string list; + attributes : string list; delim : string option; value : value; } @@ -118,6 +120,7 @@ val mk : header:Header.t option -> delim:string option -> contents:string list -> + attributes:string list -> errors:Output.t list -> (t, [ `Msg of string ]) result diff --git a/lib/lexer_mdx.mll b/lib/lexer_mdx.mll index 683abc818..26388fc0b 100644 --- a/lib/lexer_mdx.mll +++ b/lib/lexer_mdx.mll @@ -19,7 +19,9 @@ rule text section = parse newline lexbuf; `Section section :: text (Some section) lexbuf } | ( "" ws* eol? )? - "```" ([^' ' '\n']* as header) ws* ([^'\n']* as legacy_labels) eol + "```" ([^' ' '=' '{' '\n']* as header) ws* ([^'{' '\n']* as legacy_labels) ws* + ('{' ([^'\n']* as attributes) '}')? ws* + eol { let start = Lexing.lexeme_start_p lexbuf in newline lexbuf; (match label_cmt with @@ -42,7 +44,7 @@ rule text section = parse let loc = loc ~start ~end_ in let block = Block.Raw.make ~loc ~section ~header ~contents ~label_cmt - ~legacy_labels ~errors + ~legacy_labels ~attributes ~errors in `Block block :: text section lexbuf } | "" ws* eol @@ -85,7 +87,7 @@ and cram_text section = parse let loc = loc ~start ~end_ in let block = Block.Raw.make ~loc ~section ~header ~contents ~label_cmt - ~legacy_labels ~errors:[] + ~legacy_labels ~errors:[] ~attributes:None in `Block block :: (if requires_empty_line then `Text "\n" :: rest else rest) } @@ -101,7 +103,7 @@ and cram_text section = parse let rest = cram_text section lexbuf in let block = Block.Raw.make ~loc ~section ~header ~contents ~label_cmt - ~legacy_labels ~errors:[] + ~legacy_labels ~errors:[] ~attributes:None in `Block block :: (if requires_empty_line then `Text "\n" :: rest else rest) } diff --git a/lib/mli_parser.ml b/lib/mli_parser.ml index e8a977663..accea48c4 100644 --- a/lib/mli_parser.ml +++ b/lib/mli_parser.ml @@ -151,7 +151,7 @@ let make_block code_block file_contents = let delim = code_block.delimiter in let contents = slice code_block.content |> String.split_on_char '\n' in Block.mk ~loc:code_block.code_block ~section:None ~labels ~header - ~contents ~legacy_labels:false ~errors:[] ~delim + ~contents ~legacy_labels:false ~errors:[] ~delim ~attributes:[] (* Given the locations of the code blocks within [file_contents], then slice it up into [Text] and [Block] parts by using the starts and ends of those blocks as diff --git a/test/bin/mdx-test/expect/attributes/test-case.md b/test/bin/mdx-test/expect/attributes/test-case.md new file mode 100644 index 000000000..9ef060af8 --- /dev/null +++ b/test/bin/mdx-test/expect/attributes/test-case.md @@ -0,0 +1,33 @@ +https://pandoc.org/MANUAL.html#extension-fenced_code_attributes are parsed and left untouched. +Also make sure we actually parse all of these by having the wrong output to trigger a correction. + +```{.sh} +$ echo foo +foo +``` + +``` {.ocaml} +# let x = 3;; +val x : int = 4 +``` + +```{#identifier .ocaml} +# let x = 3;; +val x : int = 4 +``` + +```{#identifier .ocaml attrib="attrval"} +# let x = 3;; +val x : int = 4 +``` + +```{#identifier .ocaml attrib="attrval with spaces"} +# let x = 3;; +val x : int = 4 +``` + +```{#id1 #id2 .ocaml attrib="attrval" attr2="attrval2}"} +# let x = 3;; +val x : int = 4 +``` + diff --git a/test/bin/mdx-test/expect/attributes/test-case.md.expected b/test/bin/mdx-test/expect/attributes/test-case.md.expected new file mode 100644 index 000000000..6f8be0b81 --- /dev/null +++ b/test/bin/mdx-test/expect/attributes/test-case.md.expected @@ -0,0 +1,33 @@ +https://pandoc.org/MANUAL.html#extension-fenced_code_attributes are parsed and left untouched. +Also make sure we actually parse all of these by having the wrong output to trigger a correction. + +```sh +$ echo foo +foo +``` + +```ocaml +# let x = 3;; +val x : int = 3 +``` + +```ocaml {#identifier} +# let x = 3;; +val x : int = 3 +``` + +```ocaml {#identifier attrib="attrval"} +# let x = 3;; +val x : int = 3 +``` + +```ocaml {#identifier attrib="attrval with spaces"} +# let x = 3;; +val x : int = 3 +``` + +```ocaml {#id1 #id2 attrib="attrval" attr2="attrval2}"} +# let x = 3;; +val x : int = 3 +``` + diff --git a/test/bin/mdx-test/expect/dune.inc b/test/bin/mdx-test/expect/dune.inc index f489a3e2e..f646f0f98 100644 --- a/test/bin/mdx-test/expect/dune.inc +++ b/test/bin/mdx-test/expect/dune.inc @@ -1,4 +1,16 @@ +(rule + (target attributes.actual) + (deps (package mdx) (source_tree attributes)) + (action + (with-stdout-to %{target} + (chdir attributes + (run ocaml-mdx test --output - test-case.md))))) + +(rule + (alias runtest) + (action (diff attributes/test-case.md.expected attributes.actual))) + (rule (target bash-fence.actual) (deps (package mdx) (source_tree bash-fence)) diff --git a/test/lib/test_block.ml b/test/lib/test_block.ml index be45ed866..32920c4c0 100644 --- a/test/lib/test_block.ml +++ b/test/lib/test_block.ml @@ -24,6 +24,7 @@ let test_mk = let actual = Mdx.Block.mk ~loc:Location.none ~section:None ~labels ~legacy_labels:false ~header ~contents ~errors:[] ~delim:None + ~attributes:[] in let expected = Result.map_error diff --git a/test/lib/test_dep.ml b/test/lib/test_dep.ml index c8b371abc..c0cdc4a37 100644 --- a/test/lib/test_dep.ml +++ b/test/lib/test_dep.ml @@ -27,6 +27,7 @@ let test_of_block = match Mdx.Block.mk ~loc:Location.none ~section:None ~labels ~header:None ~contents:[] ~legacy_labels:false ~errors:[] ~delim:None + ~attributes:[] with | Ok block -> block | Error _ -> assert false)