diff --git a/proptest-regressions/lib.txt b/proptest-regressions/lib.txt index 5697447..557f8ed 100644 --- a/proptest-regressions/lib.txt +++ b/proptest-regressions/lib.txt @@ -6,3 +6,4 @@ # everyone who runs the test benefits from these saved cases. cc fb9b5df4fe46fe331cc3aa40bba6501c1c603084688fd02dda6d1c73106c1324 # shrinks to tag_name = "A", parameter = "A", multi_parameter = "\u{b}" cc 28afae9872324ba0632a8023219e32939580363ce8b99752dc19fae0ac5b63d1 # shrinks to paragraph_content = " " +cc 5b31c9987c98fc0e4faa50b782e5952e0948d2c2a60dd29081c8f54c75b4b52c # shrinks to tag_name = "ΓΈ", parameter = "a", multi_parameter = " " diff --git a/src/lib.rs b/src/lib.rs index c588fc2..a7e340d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -527,4 +527,51 @@ mod tests { parse(¶graph_content).unwrap(); } } + + #[test] + fn modifiers() { + let examples: Vec<_> = [ + "this *is* a test", + "hello, *world*!", + "*hello, world!*", + "*hello*, world!", + "*/hello/*, world!", + "*hi!* how are you?", + "this *is a test", + "this *is/ a test", + "this *is*/ a test", + "this */is/*/ a test", + ] + .into_iter() + .map(|example| example.to_string() + "\n") + .map(|str| parse(&str)) + .try_collect() + .unwrap(); + + assert_yaml_snapshot!(examples); + } + + #[test] + fn links() { + let examples: Vec<_> = [ + "{https://github.com/nvim-neorg/neorg}", + "{$ hello!}", + "{/ a-path.txt}", + "{********* hello!}", + "{:/some/file:*** a -path-.txt}", + "[anchor]", + "[anchor][description]", + "{* hello}[description]", + "[description]{* hello}", + "This is a <link>!", + "<*linkable with markup*> here!", + ] + .into_iter() + .map(|example| example.to_string() + "\n") + .map(|str| parse(&str)) + .try_collect() + .unwrap(); + + assert_yaml_snapshot!(examples); + } } diff --git a/src/snapshots/rust_norg__tests__carryover_tags.snap b/src/snapshots/rust_norg__tests__carryover_tags.snap index 5a7ff77..e520930 100644 --- a/src/snapshots/rust_norg__tests__carryover_tags.snap +++ b/src/snapshots/rust_norg__tests__carryover_tags.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - CarryoverTag: @@ -9,7 +9,8 @@ expression: examples parameters: [] next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Attribute name: @@ -17,7 +18,8 @@ expression: examples parameters: [] next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Attribute name: @@ -26,7 +28,8 @@ expression: examples - parameter next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Attribute name: @@ -35,7 +38,8 @@ expression: examples - one large parameter next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Attribute name: @@ -47,7 +51,8 @@ expression: examples - parameter next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Attribute name: @@ -58,7 +63,8 @@ expression: examples - "https://github.com/super-special/repo.git?text=hello&other_text=bye" next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Macro name: @@ -66,7 +72,8 @@ expression: examples parameters: [] next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Macro name: @@ -74,7 +81,8 @@ expression: examples parameters: [] next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Macro name: @@ -83,7 +91,8 @@ expression: examples - parameter next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Macro name: @@ -92,7 +101,8 @@ expression: examples - one large parameter next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Macro name: @@ -104,7 +114,8 @@ expression: examples - parameter next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph - - CarryoverTag: tag_type: Macro name: @@ -115,4 +126,5 @@ expression: examples - "https://github.com/super-special/repo.git?text=hello&other_text=bye" next_object: Paragraph: - - Text: paragraph + - Token: + Text: paragraph diff --git a/src/snapshots/rust_norg__tests__definitions.snap b/src/snapshots/rust_norg__tests__definitions.snap index 3f76a2d..5975e54 100644 --- a/src/snapshots/rust_norg__tests__definitions.snap +++ b/src/snapshots/rust_norg__tests__definitions.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - RangeableDetachedModifier: @@ -9,7 +9,8 @@ expression: examples extensions: [] content: - Paragraph: - - Text: Definition + - Token: + Text: Definition - - RangeableDetachedModifier: modifier_type: Definition title: @@ -17,6 +18,8 @@ expression: examples extensions: [] content: - Paragraph: - - Text: Long - - Whitespace - - Text: definition + - Token: + Text: Long + - Token: Whitespace + - Token: + Text: definition diff --git a/src/snapshots/rust_norg__tests__footnotes.snap b/src/snapshots/rust_norg__tests__footnotes.snap index 4e0b30d..797d1fb 100644 --- a/src/snapshots/rust_norg__tests__footnotes.snap +++ b/src/snapshots/rust_norg__tests__footnotes.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - RangeableDetachedModifier: @@ -9,7 +9,8 @@ expression: examples extensions: [] content: - Paragraph: - - Text: Content + - Token: + Text: Content - - RangeableDetachedModifier: modifier_type: Footnote title: @@ -17,6 +18,8 @@ expression: examples extensions: [] content: - Paragraph: - - Text: Long - - Whitespace - - Text: content + - Token: + Text: Long + - Token: Whitespace + - Token: + Text: content diff --git a/src/snapshots/rust_norg__tests__headings.snap b/src/snapshots/rust_norg__tests__headings.snap index 83818a4..52f2a00 100644 --- a/src/snapshots/rust_norg__tests__headings.snap +++ b/src/snapshots/rust_norg__tests__headings.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - Heading: @@ -18,8 +18,10 @@ expression: examples - Text: Heading extensions: [] - Paragraph: - - Text: content - - Special: "." + - Token: + Text: content + - Token: + Special: "." - - Heading: level: 7 title: @@ -64,24 +66,32 @@ expression: examples - Text: Heading extensions: [] - Paragraph: - - Text: sneaky - - Whitespace - - Text: content - - Special: "." + - Token: + Text: sneaky + - Token: Whitespace + - Token: + Text: content + - Token: + Special: "." - Heading: level: 2 title: - Text: Subheading extensions: [] - Paragraph: - - Text: more - - Whitespace - - Text: sneaky - - Whitespace - - Text: content - - Whitespace - - Text: inside - - Special: "." + - Token: + Text: more + - Token: Whitespace + - Token: + Text: sneaky + - Token: Whitespace + - Token: + Text: content + - Token: Whitespace + - Token: + Text: inside + - Token: + Special: "." - Heading: level: 1 title: diff --git a/src/snapshots/rust_norg__tests__links.snap b/src/snapshots/rust_norg__tests__links.snap new file mode 100644 index 0000000..31e44f1 --- /dev/null +++ b/src/snapshots/rust_norg__tests__links.snap @@ -0,0 +1,132 @@ +--- +source: src/lib.rs +expression: examples +--- +- - Paragraph: + - Link: + filepath: ~ + targets: + - Url: "https://github.com/nvim-neorg/neorg" + description: ~ +- - Paragraph: + - Link: + filepath: ~ + targets: + - Definition: + - Token: + Text: hello + - Token: + Special: "!" + description: ~ +- - Paragraph: + - Link: + filepath: ~ + targets: + - Path: a-path.txt + description: ~ +- - Paragraph: + - Link: + filepath: ~ + targets: + - Heading: + level: 9 + title: + - Token: + Text: hello + - Token: + Special: "!" + description: ~ +- - Paragraph: + - Link: + filepath: /some/file + targets: + - Heading: + level: 3 + title: + - Token: + Text: a + - Token: Whitespace + - AttachedModifier: + modifier_type: "-" + content: + - Token: + Text: path + - Token: + Special: "." + - Token: + Text: txt + description: ~ +- - Paragraph: + - Anchor: + content: + - Token: + Text: anchor + description: ~ +- - Paragraph: + - Anchor: + content: + - Token: + Text: anchor + description: + - Token: + Text: description +- - Paragraph: + - Link: + filepath: ~ + targets: + - Heading: + level: 1 + title: + - Token: + Text: hello + description: + - Token: + Text: description +- - Paragraph: + - AnchorDefinition: + content: + - Token: + Text: description + target: + Link: + filepath: ~ + targets: + - Heading: + level: 1 + title: + - Token: + Text: hello + description: ~ +- - Paragraph: + - Token: + Text: This + - Token: Whitespace + - Token: + Text: is + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - InlineLinkTarget: + - Token: + Text: link + - Token: + Special: "!" +- - Paragraph: + - InlineLinkTarget: + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: linkable + - Token: Whitespace + - Token: + Text: with + - Token: Whitespace + - Token: + Text: markup + - Token: Whitespace + - Token: + Text: here + - Token: + Special: "!" diff --git a/src/snapshots/rust_norg__tests__lists.snap b/src/snapshots/rust_norg__tests__lists.snap index a7affb1..e28d832 100644 --- a/src/snapshots/rust_norg__tests__lists.snap +++ b/src/snapshots/rust_norg__tests__lists.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - NestableDetachedModifier: @@ -8,86 +8,113 @@ expression: examples extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - - NestableDetachedModifier: modifier_type: UnorderedList level: 4 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - - NestableDetachedModifier: modifier_type: UnorderedList level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: UnorderedList level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: UnorderedList level: 2 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: UnorderedList level: 2 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: UnorderedList level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: UnorderedList level: 3 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - - Paragraph: - - Special: "-" - - Special: "-" - - Special: "-" - - Text: not - - Whitespace - - Text: list + - Token: + Special: "-" + - Token: + Special: "-" + - Token: + Special: "-" + - Token: + Text: not + - Token: Whitespace + - Token: + Text: list - - Paragraph: - - Special: "-" - - Special: "-" - - Special: ">" - - Whitespace - - Text: not - - Whitespace - - Text: a - - Whitespace - - Text: list + - Token: + Special: "-" + - Token: + Special: "-" + - Token: + Special: ">" + - Token: Whitespace + - Token: + Text: not + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: list diff --git a/src/snapshots/rust_norg__tests__modifiers.snap b/src/snapshots/rust_norg__tests__modifiers.snap new file mode 100644 index 0000000..5535451 --- /dev/null +++ b/src/snapshots/rust_norg__tests__modifiers.snap @@ -0,0 +1,158 @@ +--- +source: src/lib.rs +expression: examples +--- +- - Paragraph: + - Token: + Text: this + - Token: Whitespace + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: is + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: test +- - Paragraph: + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: world + - Token: + Special: "!" +- - Paragraph: + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" +- - Paragraph: + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" +- - Paragraph: + - AttachedModifier: + modifier_type: "*" + content: + - AttachedModifier: + modifier_type: / + content: + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" +- - Paragraph: + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: hi + - Token: + Special: "!" + - Token: Whitespace + - Token: + Text: how + - Token: Whitespace + - Token: + Text: are + - Token: Whitespace + - Token: + Text: you? +- - Paragraph: + - Token: + Text: this + - Token: Whitespace + - Token: + Special: "*" + - Token: + Text: is + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: test +- - Paragraph: + - Token: + Text: this + - Token: Whitespace + - Token: + Special: "*" + - Token: + Text: is + - Token: + Special: / + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: test +- - Paragraph: + - Token: + Text: this + - Token: Whitespace + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: is + - Token: + Special: / + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: test +- - Paragraph: + - Token: + Text: this + - Token: Whitespace + - AttachedModifier: + modifier_type: "*" + content: + - AttachedModifier: + modifier_type: / + content: + - Token: + Text: is + - Token: + Special: / + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: test diff --git a/src/snapshots/rust_norg__tests__ordered_lists.snap b/src/snapshots/rust_norg__tests__ordered_lists.snap index 413882a..abe3b0a 100644 --- a/src/snapshots/rust_norg__tests__ordered_lists.snap +++ b/src/snapshots/rust_norg__tests__ordered_lists.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - NestableDetachedModifier: @@ -8,86 +8,113 @@ expression: examples extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - - NestableDetachedModifier: modifier_type: OrderedList level: 4 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - - NestableDetachedModifier: modifier_type: OrderedList level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: OrderedList level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: OrderedList level: 2 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: OrderedList level: 2 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: OrderedList level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - NestableDetachedModifier: modifier_type: OrderedList level: 3 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: list + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: list - - Paragraph: - - Special: "~" - - Special: "~" - - Special: "~" - - Text: not - - Whitespace - - Text: list + - Token: + Special: "~" + - Token: + Special: "~" + - Token: + Special: "~" + - Token: + Text: not + - Token: Whitespace + - Token: + Text: list - - Paragraph: - - Special: "~" - - Special: "~" - - Special: ">" - - Whitespace - - Text: not - - Whitespace - - Text: a - - Whitespace - - Text: list + - Token: + Special: "~" + - Token: + Special: "~" + - Token: + Special: ">" + - Token: Whitespace + - Token: + Text: not + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: list diff --git a/src/snapshots/rust_norg__tests__paragraphs.snap b/src/snapshots/rust_norg__tests__paragraphs.snap index 7e0e2e7..92f778e 100644 --- a/src/snapshots/rust_norg__tests__paragraphs.snap +++ b/src/snapshots/rust_norg__tests__paragraphs.snap @@ -3,36 +3,61 @@ source: src/lib.rs expression: examples --- - - Paragraph: - - Text: hello - - Special: "," - - Whitespace - - Text: world - - Special: "!" + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" - - Paragraph: - - Special: "*" - - Text: hello - - Special: "," - - Whitespace - - Text: world - - Special: "!" - - Special: "*" + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" - - Paragraph: - - Special: "*" - - Text: hello - - Special: "," - - Text: world - - Special: "!" - - Special: "*" + - AttachedModifier: + modifier_type: "*" + content: + - Token: + Text: hello + - Token: + Special: "," + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" - - Paragraph: - - Text: two + - Token: + Text: two - Paragraph: - - Text: paragraphs + - Token: + Text: paragraphs - - Paragraph: - - Text: paragraph - - Text: here + - Token: + Text: paragraph + - Token: Whitespace + - Token: + Text: here - Paragraph: - - Text: another - - Whitespace - - Text: paragraph - - Text: here - - Special: "." + - Token: + Text: another + - Token: Whitespace + - Token: + Text: paragraph + - Token: Whitespace + - Token: + Text: here + - Token: + Special: "." diff --git a/src/snapshots/rust_norg__tests__quotes.snap b/src/snapshots/rust_norg__tests__quotes.snap index 9549d89..bc7c72d 100644 --- a/src/snapshots/rust_norg__tests__quotes.snap +++ b/src/snapshots/rust_norg__tests__quotes.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - NestableDetachedModifier: @@ -8,86 +8,113 @@ expression: examples extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - - NestableDetachedModifier: modifier_type: Quote level: 4 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - - NestableDetachedModifier: modifier_type: Quote level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - NestableDetachedModifier: modifier_type: Quote level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - NestableDetachedModifier: modifier_type: Quote level: 2 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - NestableDetachedModifier: modifier_type: Quote level: 2 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - NestableDetachedModifier: modifier_type: Quote level: 1 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - NestableDetachedModifier: modifier_type: Quote level: 3 extensions: [] content: Paragraph: - - Text: Test - - Whitespace - - Text: quote + - Token: + Text: Test + - Token: Whitespace + - Token: + Text: quote - - Paragraph: - - Special: ">" - - Special: ">" - - Special: ">" - - Text: not - - Whitespace - - Text: quote + - Token: + Special: ">" + - Token: + Special: ">" + - Token: + Special: ">" + - Token: + Text: not + - Token: Whitespace + - Token: + Text: quote - - Paragraph: - - Special: ">" - - Special: ">" - - Special: "-" - - Whitespace - - Text: not - - Whitespace - - Text: a - - Whitespace - - Text: quote + - Token: + Special: ">" + - Token: + Special: ">" + - Token: + Special: "-" + - Token: Whitespace + - Token: + Text: not + - Token: Whitespace + - Token: + Text: a + - Token: Whitespace + - Token: + Text: quote diff --git a/src/snapshots/rust_norg__tests__ranged_tags.snap b/src/snapshots/rust_norg__tests__ranged_tags.snap index ab6410b..97f989b 100644 --- a/src/snapshots/rust_norg__tests__ranged_tags.snap +++ b/src/snapshots/rust_norg__tests__ranged_tags.snap @@ -8,10 +8,13 @@ expression: examples parameters: [] content: - Paragraph: - - Text: Hello - - Whitespace - - Text: world - - Special: "!" + - Token: + Text: Hello + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" - - RangedTag: name: - example @@ -28,9 +31,11 @@ expression: examples parameters: [] next_object: Paragraph: - - Text: text - - Whitespace - - Text: within + - Token: + Text: text + - Token: Whitespace + - Token: + Text: within - - RangedTag: name: - some-complex_tag_ @@ -40,15 +45,20 @@ expression: examples - third-parameter content: - Paragraph: - - Text: this - - Whitespace - - Text: is - - Whitespace - - Text: some - - Whitespace - - Text: text - - Whitespace - - Text: within + - Token: + Text: this + - Token: Whitespace + - Token: + Text: is + - Token: Whitespace + - Token: + Text: some + - Token: Whitespace + - Token: + Text: text + - Token: Whitespace + - Token: + Text: within - - RangedTag: name: - example @@ -86,10 +96,13 @@ expression: examples parameters: [] content: - Paragraph: - - Text: Hello - - Whitespace - - Text: world - - Special: "!" + - Token: + Text: Hello + - Token: Whitespace + - Token: + Text: world + - Token: + Special: "!" - - RangedTag: name: - example @@ -106,9 +119,11 @@ expression: examples parameters: [] next_object: Paragraph: - - Text: text - - Whitespace - - Text: within + - Token: + Text: text + - Token: Whitespace + - Token: + Text: within - - RangedTag: name: - some-complex_tag_ @@ -118,15 +133,20 @@ expression: examples - third-parameter content: - Paragraph: - - Text: this - - Whitespace - - Text: is - - Whitespace - - Text: some - - Whitespace - - Text: text - - Whitespace - - Text: within + - Token: + Text: this + - Token: Whitespace + - Token: + Text: is + - Token: Whitespace + - Token: + Text: some + - Token: Whitespace + - Token: + Text: text + - Token: Whitespace + - Token: + Text: within - - RangedTag: name: - example diff --git a/src/snapshots/rust_norg__tests__tables.snap b/src/snapshots/rust_norg__tests__tables.snap index 1ae9b58..d9cd6a2 100644 --- a/src/snapshots/rust_norg__tests__tables.snap +++ b/src/snapshots/rust_norg__tests__tables.snap @@ -1,5 +1,5 @@ --- -source: src/main.rs +source: src/lib.rs expression: examples --- - - RangeableDetachedModifier: @@ -9,9 +9,11 @@ expression: examples extensions: [] content: - Paragraph: - - Text: Cell - - Whitespace - - Text: content + - Token: + Text: Cell + - Token: Whitespace + - Token: + Text: content - - RangeableDetachedModifier: modifier_type: Table title: @@ -19,9 +21,13 @@ expression: examples extensions: [] content: - Paragraph: - - Text: Long - - Whitespace - - Text: cell - - Whitespace - - Text: content - - Special: "." + - Token: + Text: Long + - Token: Whitespace + - Token: + Text: cell + - Token: Whitespace + - Token: + Text: content + - Token: + Special: "." diff --git a/src/stage_2.rs b/src/stage_2.rs index 12b1bff..4bf6527 100644 --- a/src/stage_2.rs +++ b/src/stage_2.rs @@ -36,14 +36,16 @@ impl From<ParagraphSegmentToken> for String { } } -pub type ParagraphSegment = Vec<ParagraphSegmentToken>; +pub type ParagraphTokenList = Vec<ParagraphSegmentToken>; -fn tokens_to_paragraph_segment(tokens: Vec<NorgToken>) -> ParagraphSegment { +fn tokens_to_paragraph_segment(tokens: Vec<NorgToken>) -> ParagraphTokenList { tokens .into_iter() .peekable() .batching(|it| match it.next() { - Some(NorgToken::Whitespace(_)) => Some(ParagraphSegmentToken::Whitespace), + Some(NorgToken::SingleNewline) | Some(NorgToken::Whitespace(_)) => { + Some(ParagraphSegmentToken::Whitespace) + } Some(NorgToken::Special(c)) => Some(ParagraphSegmentToken::Special(c)), Some(NorgToken::Escape(c)) => Some(ParagraphSegmentToken::Escape(c)), Some(NorgToken::Regular(c)) => { @@ -66,54 +68,54 @@ fn tokens_to_paragraph_segment(tokens: Vec<NorgToken>) -> ParagraphSegment { #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] pub enum NorgBlock { /// A segment of a paragraph consisting of Norg tokens. - ParagraphSegment(ParagraphSegment), + ParagraphSegment(ParagraphTokenList), /// End of a paragraph segment. - ParagraphSegmentEnd(ParagraphSegment), + ParagraphSegmentEnd(ParagraphTokenList), /// A heading with a specified level, title, and optional extension section. Heading { level: u16, - title: ParagraphSegment, - extension_section: ParagraphSegment, + title: ParagraphTokenList, + extension_section: ParagraphTokenList, }, /// A nestable detached modifier with a type, level, and optional extension section. NestableDetachedModifier { modifier_type: char, level: u16, - extension_section: ParagraphSegment, + extension_section: ParagraphTokenList, }, /// A rangeable detached modifier with an indication if it is ranged, type, title, and optional extension section. RangeableDetachedModifier { ranged: bool, modifier_type: char, - title: ParagraphSegment, - extension_section: ParagraphSegment, + title: ParagraphTokenList, + extension_section: ParagraphTokenList, }, /// Closing tag for a rangeable detached modifier. RangeableDetachedModifierClose(char), /// A ranged tag with a type, name, and optional parameters. RangedTag { tag_type: char, - name: ParagraphSegment, - parameters: Option<Vec<ParagraphSegment>>, + name: ParagraphTokenList, + parameters: Option<Vec<ParagraphTokenList>>, }, /// End of a ranged tag. RangedTagEnd(char), /// A verbatim ranged tag with a name, optional parameters, and content. VerbatimRangedTag { - name: ParagraphSegment, - parameters: Option<Vec<ParagraphSegment>>, + name: ParagraphTokenList, + parameters: Option<Vec<ParagraphTokenList>>, content: Vec<NorgToken>, }, /// An infirm tag with a name and optional parameters. InfirmTag { - name: ParagraphSegment, - parameters: Option<Vec<ParagraphSegment>>, + name: ParagraphTokenList, + parameters: Option<Vec<ParagraphTokenList>>, }, /// A carryover tag with a type, name, and optional parameters. CarryoverTag { tag_type: char, - name: ParagraphSegment, - parameters: Option<Vec<ParagraphSegment>>, + name: ParagraphTokenList, + parameters: Option<Vec<ParagraphTokenList>>, }, } @@ -401,9 +403,9 @@ pub fn stage_2() -> impl Parser<NorgToken, Vec<NorgBlock>, Error = chumsky::erro NorgToken::Newlines(_) => { NorgBlock::ParagraphSegmentEnd(tokens_to_paragraph_segment(content)) } - NorgToken::SingleNewline => { - NorgBlock::ParagraphSegment(tokens_to_paragraph_segment(content)) - } + NorgToken::SingleNewline => NorgBlock::ParagraphSegment( + tokens_to_paragraph_segment(content.into_iter().chain(trailing).collect()), + ), _ => unreachable!(), }) .labelled("paragraph_segment"), diff --git a/src/stage_3.rs b/src/stage_3.rs index dc4ab9e..b2e552d 100644 --- a/src/stage_3.rs +++ b/src/stage_3.rs @@ -2,7 +2,7 @@ use chumsky::prelude::*; use itertools::Itertools; use serde::Serialize; -use crate::stage_2::{NorgBlock, ParagraphSegment, ParagraphSegmentToken}; +use crate::stage_2::{NorgBlock, ParagraphSegmentToken, ParagraphTokenList}; #[derive(Debug, PartialEq, Serialize)] pub enum NestableDetachedModifier { @@ -81,9 +81,396 @@ pub enum RangedTag { Standard, } +fn paragraph_parser_opener_candidates_and_links() -> impl Parser< + ParagraphSegmentToken, + Vec<ParagraphSegment>, + Error = chumsky::error::Simple<ParagraphSegmentToken>, +> { + let token = any().map(ParagraphSegment::Token); + let modifier = select! { + ParagraphSegmentToken::Special(c @ ('*' | '/' | '_' | '-')) => c, + }; + + let whitespace_or_special = select! { + w @ ParagraphSegmentToken::Whitespace => w, + s @ ParagraphSegmentToken::Special(_) => s, + }; + + let opening_modifier_candidate = whitespace_or_special + .then(modifier.repeated().at_least(1)) + .then(just(ParagraphSegmentToken::Whitespace).not()) + .map(|((left, modifiers), right)| { + ParagraphSegment::AttachedModifierOpener((Some(left), modifiers, right)) + }); + + let left_empty_opening_modifier = modifier + .repeated() + .at_least(1) + .then(just(ParagraphSegmentToken::Whitespace).not()) + .map(|(modifiers, right)| { + ParagraphSegment::AttachedModifierOpener((None, modifiers, right)) + }); + + let anchor = just(ParagraphSegmentToken::Special('[')) + .ignore_then( + just(ParagraphSegmentToken::Special(']')) + .not() + .repeated() + .at_least(1), + ) + .then_ignore(just(ParagraphSegmentToken::Special(']'))); + + let link = just(ParagraphSegmentToken::Special('{')) + .ignore_then( + just(ParagraphSegmentToken::Special(':')) + .ignore_then( + just(ParagraphSegmentToken::Special(':')) + .not() + .repeated() + .at_least(1), + ) + .then_ignore(just(ParagraphSegmentToken::Special(':'))) + .or_not(), + ) + .then( + choice(( + just(ParagraphSegmentToken::Special('*')) + .repeated() + .at_least(1) + .map(|tokens| "*".repeat(tokens.len())), + just(ParagraphSegmentToken::Special('$')).to("$".to_string()), + just(ParagraphSegmentToken::Special('^')).to("^".to_string()), + just(ParagraphSegmentToken::Special('/')).to("/".to_string()), + just(ParagraphSegmentToken::Special('=')).to("=".to_string()), + just(ParagraphSegmentToken::Special('?')).to("?".to_string()), + just(ParagraphSegmentToken::Special('@')).to("@".to_string()), + )) + .then_ignore( + just(ParagraphSegmentToken::Whitespace) + .repeated() + .at_least(1), + ) + .or_not(), + ) + .then( + just(ParagraphSegmentToken::Special('}')) + .not() + .repeated() + .at_least(1), + ) + .then_ignore(just(ParagraphSegmentToken::Special('}'))) + .then(anchor.clone().or_not()) + .map( + |(((filepath, modifiers), content), description)| ParagraphSegment::Link { + filepath: filepath + .map(|content| content.into_iter().map_into::<String>().collect()), + description: description.map(|content| parse_paragraph(content).unwrap()), + targets: vec![if let Some(modifiers) = modifiers { + match modifiers.as_str() { + "$" => LinkTarget::Definition(parse_paragraph(content).unwrap()), + "^" => LinkTarget::Footnote(parse_paragraph(content).unwrap()), + "?" => LinkTarget::Wiki(parse_paragraph(content).unwrap()), + "=" => LinkTarget::Extendable(parse_paragraph(content).unwrap()), + "/" => LinkTarget::Path(content.into_iter().map_into::<String>().collect()), + "@" => LinkTarget::Timestamp( + content.into_iter().map_into::<String>().collect(), + ), + + // Only other possibility is a heading. + str => LinkTarget::Heading { + level: str.len() as u16, + title: parse_paragraph(content).unwrap(), + }, + } + } else { + LinkTarget::Url(content.into_iter().map_into::<String>().collect()) + }], + }, + ); + + let inline_linkable = just(ParagraphSegmentToken::Special('<')) + .ignore_then( + just(ParagraphSegmentToken::Special('>')) + .not() + .repeated() + .at_least(1), + ) + .then_ignore(just(ParagraphSegmentToken::Special('>'))) + .map(|content| ParagraphSegment::InlineLinkTarget(parse_paragraph(content).unwrap())); + + left_empty_opening_modifier.or_not().chain( + choice(( + link.clone(), + anchor + .clone() + .then(link) + .map(|(content, link)| ParagraphSegment::AnchorDefinition { + content: parse_paragraph(content).unwrap(), + target: Box::new(link), + }), + anchor + .clone() + .then(anchor.clone().or_not()) + .map(|(content, description)| ParagraphSegment::Anchor { + content: parse_paragraph(content).unwrap(), + description: description.map(|content| parse_paragraph(content).unwrap()), + }), + inline_linkable, + opening_modifier_candidate, + token, + )) + .repeated() + .at_least(1), + ) +} + +fn dedup_opener_candidates(input: Vec<ParagraphSegment>) -> Vec<ParagraphSegment> { + use ParagraphSegment::*; + + input + .into_iter() + .coalesce(|prev, next| match (prev.clone(), next.clone()) { + (AttachedModifierOpener(_), AttachedModifierOpener(data)) => { + Err((prev, AttachedModifierOpenerFail(data))) + } + _ => Err((prev, next)), + }) + .collect() +} + +fn paragraph_parser_closer_candidates( +) -> impl Parser<ParagraphSegment, Vec<ParagraphSegment>, Error = chumsky::error::Simple<ParagraphSegment>> +{ + use ParagraphSegment::*; + + let token = any(); + let modifier = select! { + Token(ParagraphSegmentToken::Special(c @ ('*' | '/' | '_' | '-'))) => c, + }; + + let whitespace_or_special = select! { + w @ Token(ParagraphSegmentToken::Whitespace) => w, + s @ Token(ParagraphSegmentToken::Special(_)) => s, + }; + + let closing_modifier_candidate = just(Token(ParagraphSegmentToken::Whitespace)) + .not() + .then(modifier.repeated().at_least(1)) + .then(whitespace_or_special) + .map(|((left, modifiers), right)| { + ParagraphSegment::AttachedModifierCloserCandidate(( + Box::new(left), + modifiers, + Some(Box::new(right)), + )) + }); + + // TODO(vhyrro): This is not optimal, as it causes a second parse of a potentially long string + // of nodes. Ideally, the `end()` check should be done directly in a single parse. + let closing_modifier_candidate_with_eof = just(Token(ParagraphSegmentToken::Whitespace)) + .not() + .then(modifier.repeated().at_least(1)) + .then_ignore(end()) + .map(|(left, modifiers)| { + ParagraphSegment::AttachedModifierCloserCandidate((Box::new(left), modifiers, None)) + }); + + choice(( + closing_modifier_candidate, + closing_modifier_candidate_with_eof, + token, + )) + .repeated() + .at_least(1) +} + +fn unravel_candidates(input: Vec<ParagraphSegment>) -> Vec<ParagraphSegment> { + use ParagraphSegment::*; + + input + .into_iter() + .fold(Vec::new(), |mut acc: Vec<ParagraphSegment>, segment| { + match segment { + t @ Token(_) => acc.push(t), + AttachedModifierOpener((left, modifiers, right)) => { + if let Some(left) = left { + acc.push(Token(left)); + } + acc.extend(modifiers.into_iter().map(|modifier_type| { + AttachedModifierCandidate { + modifier_type, + content: Vec::default(), + closer: None, + } + })); + acc.push(Token(right)); + } + AttachedModifierCloserCandidate((left, modifiers, right)) => { + acc.push(*left); + acc.extend(modifiers.into_iter().map(AttachedModifierCloser)); + if let Some(right) = right { + acc.push(*right); + } + } + AttachedModifierCloser(c) => acc.push(Token(ParagraphSegmentToken::Special(c))), + AttachedModifierOpenerFail((left, modifiers, right)) => { + if let Some(left) = left { + acc.push(Token(left)); + } + acc.extend( + modifiers + .into_iter() + .map(|c| Token(ParagraphSegmentToken::Special(c))), + ); + acc.push(Token(right)); + } + others => acc.push(others), + }; + + acc + }) +} + +fn paragraph_rollup_candidates( +) -> impl Parser<ParagraphSegment, Vec<ParagraphSegment>, Error = chumsky::error::Simple<ParagraphSegment>> +{ + let candidate = select! { ParagraphSegment::AttachedModifierCloser(c) => c, }; + + let attached_modifier = recursive(|attached_modifier| { + select! { + ParagraphSegment::AttachedModifierCandidate { modifier_type, .. } => modifier_type, + } + .then(attached_modifier.or(candidate.not()).repeated().at_least(1)) + .then(candidate) + .try_map(|((modifier_type, content), closer), span| { + if modifier_type == closer { + Ok(ParagraphSegment::AttachedModifier { + modifier_type, + content, + }) + } else { + Err(Simple::custom( + span, + "differing opening and closing modifiers found", + )) + } + }) + }); + + choice((attached_modifier, any())).repeated().at_least(1) +} + +fn eliminate_invalid_candidates(input: Vec<ParagraphSegment>) -> Vec<ParagraphSegment> { + input + .into_iter() + .fold(Vec::new(), |mut acc: Vec<ParagraphSegment>, segment| { + match segment { + ParagraphSegment::AttachedModifierCandidate { + modifier_type, + content, + closer, + } => { + acc.push(ParagraphSegment::Token(ParagraphSegmentToken::Special( + modifier_type, + ))); + acc.extend(content); + + if let Some(closer) = closer { + acc.push(*closer); + } + } + _ => acc.push(segment), + }; + + acc + }) +} + +#[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] +pub enum LinkTarget { + Heading { + level: u16, + title: Vec<ParagraphSegment>, + }, + Footnote(Vec<ParagraphSegment>), + Definition(Vec<ParagraphSegment>), + Generic(Vec<ParagraphSegment>), + Wiki(Vec<ParagraphSegment>), + Extendable(Vec<ParagraphSegment>), + Path(String), + Url(String), + Timestamp(String), +} + +#[derive(Debug, Clone, PartialEq, Serialize, Hash, Eq)] +pub enum ParagraphSegment { + Token(ParagraphSegmentToken), + AttachedModifierOpener( + ( + Option<ParagraphSegmentToken>, + Vec<char>, + ParagraphSegmentToken, + ), + ), + AttachedModifierOpenerFail( + ( + Option<ParagraphSegmentToken>, + Vec<char>, + ParagraphSegmentToken, + ), + ), + AttachedModifierCloserCandidate( + ( + Box<ParagraphSegment>, + Vec<char>, + Option<Box<ParagraphSegment>>, + ), + ), + AttachedModifierCloser(char), + AttachedModifierCandidate { + modifier_type: char, + content: Vec<Self>, + closer: Option<Box<Self>>, + }, + AttachedModifier { + modifier_type: char, + content: Vec<Self>, + }, + Link { + filepath: Option<String>, + targets: Vec<LinkTarget>, + description: Option<Vec<ParagraphSegment>>, + }, + AnchorDefinition { + content: Vec<ParagraphSegment>, + target: Box<Self>, + }, + Anchor { + content: Vec<ParagraphSegment>, + description: Option<Vec<ParagraphSegment>>, + }, + InlineLinkTarget(Vec<ParagraphSegment>), +} + +fn parse_paragraph( + input: Vec<ParagraphSegmentToken>, +) -> Result<Vec<ParagraphSegment>, Vec<chumsky::error::Simple<ParagraphSegmentToken>>> { + Ok(eliminate_invalid_candidates(unravel_candidates( + paragraph_rollup_candidates() + .parse(unravel_candidates( + paragraph_parser_closer_candidates() + .parse(unravel_candidates(dedup_opener_candidates( + paragraph_parser_opener_candidates_and_links().parse(input)?, + ))) + .unwrap(), + )) + .unwrap(), + ))) +} + #[derive(Debug, PartialEq, Serialize)] pub enum NorgASTFlat { - Paragraph(ParagraphSegment), + Paragraph(Vec<ParagraphSegment>), NestableDetachedModifier { modifier_type: NestableDetachedModifier, level: u16, @@ -92,13 +479,13 @@ pub enum NorgASTFlat { }, RangeableDetachedModifier { modifier_type: RangeableDetachedModifier, - title: ParagraphSegment, + title: ParagraphTokenList, extensions: Vec<DetachedModifierExtension>, content: Vec<Self>, }, Heading { level: u16, - title: ParagraphSegment, + title: ParagraphTokenList, extensions: Vec<DetachedModifierExtension>, }, CarryoverTag { @@ -205,7 +592,15 @@ pub fn stage_3( .chain(paragraph_segment_end.or_not()), paragraph_segment_end, )) - .map(NorgASTFlat::Paragraph); + .map(|mut tokens| { + // Trim trailing whitespace (both user-induced but also induced by us when + // converting single newlines to whitespace). + if let Some(ParagraphSegmentToken::Whitespace) = tokens.last() { + tokens.pop(); + } + + NorgASTFlat::Paragraph(parse_paragraph(tokens).unwrap()) + }); let nestable_detached_modifier = select! { NorgBlock::NestableDetachedModifier { modifier_type: '-', level, extension_section } => (NestableDetachedModifier::UnorderedList, level, extension_section), @@ -257,7 +652,7 @@ pub fn stage_3( extensions: detached_modifier_extensions().parse(extension_section).unwrap_or_default(), })); - let stringify_tokens_and_split = move |tokens: ParagraphSegment| -> Vec<String> { + let stringify_tokens_and_split = move |tokens: ParagraphTokenList| -> Vec<String> { tokens.into_iter().map_into::<String>().collect::<String>().split('.').map_into().collect() };