-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
infra: implement mdbook plugin to strip markup from headings
- Loading branch information
1 parent
1c26628
commit 678ace3
Showing
7 changed files
with
1,115 additions
and
628 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
use std::io; | ||
|
||
use clap::{self, Parser, Subcommand}; | ||
use mdbook::preprocess::{CmdPreprocessor, Preprocessor}; | ||
|
||
use mdbook_trpl::Heading; | ||
|
||
fn main() -> Result<(), String> { | ||
let cli = Cli::parse(); | ||
if let Some(Command::Supports { renderer }) = cli.command { | ||
return if Heading.supports_renderer(&renderer) { | ||
Ok(()) | ||
} else { | ||
Err(format!("Renderer '{renderer}' is unsupported")) | ||
}; | ||
} | ||
|
||
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin()) | ||
.map_err(|e| format!("{e}"))?; | ||
let processed = Heading.run(&ctx, book).map_err(|e| format!("{e}"))?; | ||
serde_json::to_writer(io::stdout(), &processed).map_err(|e| format!("{e}")) | ||
} | ||
|
||
/// A simple preprocessor for semantic markup for code listings in _The Rust | ||
/// Programming Language_. | ||
#[derive(Parser, Debug)] | ||
struct Cli { | ||
#[command(subcommand)] | ||
command: Option<Command>, | ||
} | ||
|
||
#[derive(Subcommand, Debug)] | ||
enum Command { | ||
/// Is the renderer supported? | ||
/// | ||
/// This supports the HTML | ||
Supports { renderer: String }, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
use anyhow::anyhow; | ||
use mdbook::{ | ||
book::Book, | ||
preprocess::{Preprocessor, PreprocessorContext}, | ||
BookItem, | ||
}; | ||
use pulldown_cmark::{Event, Tag, TagEnd}; | ||
use pulldown_cmark_to_cmark::cmark; | ||
|
||
use crate::{CompositeError, Mode}; | ||
|
||
pub struct TrplHeading; | ||
|
||
impl Preprocessor for TrplHeading { | ||
fn name(&self) -> &str { | ||
"trpl-heading" | ||
} | ||
|
||
fn run( | ||
&self, | ||
ctx: &PreprocessorContext, | ||
mut book: Book, | ||
) -> anyhow::Result<Book> { | ||
let mode = Mode::from_context(ctx, self.name())?; | ||
|
||
let mut errors = vec![]; | ||
book.for_each_mut(|item| { | ||
if let BookItem::Chapter(ref mut chapter) = item { | ||
match rewrite_headings(&chapter.content, mode) { | ||
Ok(rewritten) => chapter.content = rewritten, | ||
Err(reason) => errors.push(reason), | ||
} | ||
} | ||
}); | ||
|
||
if errors.is_empty() { | ||
Ok(book) | ||
} else { | ||
Err(CompositeError(errors).into()) | ||
} | ||
} | ||
|
||
fn supports_renderer(&self, renderer: &str) -> bool { | ||
renderer == "html" || renderer == "markdown" || renderer == "test" | ||
} | ||
} | ||
|
||
fn rewrite_headings(src: &str, mode: Mode) -> anyhow::Result<String> { | ||
// Don't rewrite anything for the default mode. | ||
if mode == Mode::Default { | ||
return Ok(src.into()); | ||
} | ||
|
||
#[derive(Default)] | ||
struct State<'e> { | ||
in_heading: bool, | ||
events: Vec<Event<'e>>, | ||
} | ||
|
||
let final_state: State = crate::parser(src).try_fold( | ||
State::default(), | ||
|mut state, event| -> anyhow::Result<State> { | ||
if state.in_heading { | ||
match event { | ||
// When we see the start or end of any of the inline tags | ||
// (emphasis, strong emphasis, or strikethrough), or any | ||
// inline HTML tags, we just skip emitting them. As dumb as | ||
// that may seem, it does the job! | ||
Event::Start( | ||
Tag::Emphasis | Tag::Strong | Tag::Strikethrough, | ||
) | ||
| Event::End( | ||
TagEnd::Emphasis | ||
| TagEnd::Strong | ||
| TagEnd::Strikethrough, | ||
) | ||
| Event::InlineHtml(_) => { /* skip */ } | ||
|
||
// For code, we just emit the body of the inline code block, | ||
// unchanged (the wrapping backticks are not present here). | ||
Event::Code(code) => { | ||
state.events.push(Event::Text(code)); | ||
} | ||
|
||
// Assume headings are well-formed; you cannot have a nested | ||
// headings, so we don't have to check heading level. | ||
Event::End(TagEnd::Heading(_)) => { | ||
state.in_heading = false; | ||
state.events.push(event); | ||
} | ||
_ => state.events.push(event), | ||
} | ||
} else if matches!(event, Event::Start(Tag::Heading { .. })) { | ||
state.events.push(event); | ||
state.in_heading = true; | ||
} else { | ||
state.events.push(event); | ||
} | ||
|
||
Ok(state) | ||
}, | ||
)?; | ||
|
||
if final_state.in_heading { | ||
return Err(anyhow!("Unclosed heading")); | ||
} | ||
|
||
let mut rewritten = String::new(); | ||
cmark(final_state.events.into_iter(), &mut rewritten)?; | ||
Ok(rewritten) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
use super::*; | ||
|
||
#[test] | ||
fn default_mode_is_unchanged() { | ||
let result = rewrite_headings( | ||
"# This is *emphasized* and **strong** and `code` | ||
## Here is *another* and **strong** and `code` | ||
### Third *level* **heading** with `code` | ||
#### Fourth *heading* **level** and `code` | ||
##### Fifth *level* **heading** and `code` | ||
###### Last *heading* **level** with `code` | ||
", | ||
Mode::Default, | ||
); | ||
|
||
assert_eq!( | ||
result.unwrap(), | ||
"# This is *emphasized* and **strong** and `code` | ||
## Here is *another* and **strong** and `code` | ||
### Third *level* **heading** with `code` | ||
#### Fourth *heading* **level** and `code` | ||
##### Fifth *level* **heading** and `code` | ||
###### Last *heading* **level** with `code` | ||
" | ||
); | ||
} | ||
|
||
// Note: these tests all check that the result of rewriting a header *with* and | ||
// *without* the markup is the same, so that other “normalization” that happens | ||
// along the way (inserting or removing newlines, e.g.) is ignored. | ||
mod simple_mode { | ||
use super::*; | ||
|
||
#[test] | ||
fn strips_em() { | ||
let result = rewrite_headings( | ||
"# This is *emphasized* and _this is too_ | ||
## Here is *another* and _emphasis style_ | ||
### Third *level* _heading_ here | ||
#### Fourth *heading* _level_ text | ||
##### Fifth *level* _heading_ now | ||
###### Last *heading* _level_ test | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
let expected = rewrite_headings( | ||
"# This is emphasized and this is too | ||
## Here is another and emphasis style | ||
### Third level heading here | ||
#### Fourth heading level text | ||
##### Fifth level heading now | ||
###### Last heading level test | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
assert_eq!(result.unwrap(), expected.unwrap()); | ||
} | ||
|
||
#[test] | ||
fn strips_nested_em() { | ||
let result = rewrite_headings( | ||
"# *This _is *extra* emphatic_ emphasis*.", | ||
Mode::Simple, | ||
); | ||
let expected = "# This is extra emphatic emphasis."; | ||
|
||
assert_eq!(result.unwrap(), expected); | ||
} | ||
|
||
#[test] | ||
fn strips_strong() { | ||
let result = rewrite_headings( | ||
"# This is **strong** and __this is too__ | ||
## Here is **another** and __strong style__ | ||
### Third **level** __heading__ here | ||
#### Fourth **heading** __level__ text | ||
##### Fifth **level** __heading__ now | ||
###### Last **heading** __level__ test | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
let expected = rewrite_headings( | ||
"# This is strong and this is too | ||
## Here is another and strong style | ||
### Third level heading here | ||
#### Fourth heading level text | ||
##### Fifth level heading now | ||
###### Last heading level test | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
assert_eq!(result.unwrap(), expected.unwrap()); | ||
} | ||
|
||
#[test] | ||
fn strips_nested_strong() { | ||
let result = rewrite_headings( | ||
"# **This __is **extra** emphatic__ emphasis**.", | ||
Mode::Simple, | ||
); | ||
let expected = "# This is extra emphatic emphasis."; | ||
|
||
assert_eq!(result.unwrap(), expected); | ||
} | ||
|
||
#[test] | ||
fn strips_code() { | ||
let result = rewrite_headings( | ||
"# This is `code` | ||
## Here is `another` | ||
### Third `level` | ||
#### Fourth `heading` | ||
##### Fifth `level` | ||
###### Last `heading` | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
let expected = rewrite_headings( | ||
"# This is code | ||
## Here is another | ||
### Third level | ||
#### Fourth heading | ||
##### Fifth level | ||
###### Last heading | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
assert_eq!(result.unwrap(), expected.unwrap()); | ||
} | ||
|
||
#[test] | ||
fn strips_html() { | ||
let result = rewrite_headings( | ||
"# This is <span>html</span> | ||
## Here is <span>another</span> | ||
### Third <span>level</span> | ||
#### Fourth <span>heading</span> | ||
##### Fifth <span>level</span> | ||
###### Last <span>heading</span> | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
let expected = rewrite_headings( | ||
"# This is html | ||
## Here is another | ||
### Third level | ||
#### Fourth heading | ||
##### Fifth level | ||
###### Last heading | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
assert_eq!(result.unwrap(), expected.unwrap()); | ||
} | ||
|
||
#[test] | ||
fn strips_strikethrough() { | ||
let result = rewrite_headings( | ||
"# This is ~~strikethrough~~ | ||
## Here is ~~another~~ | ||
### Third ~~level~~ | ||
#### Fourth ~~heading~~ | ||
##### Fifth ~~level~~ | ||
###### Last ~~heading~~ | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
let expected = rewrite_headings( | ||
"# This is strikethrough | ||
## Here is another | ||
### Third level | ||
#### Fourth heading | ||
##### Fifth level | ||
###### Last heading | ||
", | ||
Mode::Simple, | ||
); | ||
|
||
assert_eq!(result.unwrap(), expected.unwrap()); | ||
} | ||
|
||
#[test] | ||
fn strips_nested_combinations() { | ||
let result = rewrite_headings( | ||
"# **Nested ~~strikethrough _emphasis_ fun~~ times**", | ||
Mode::Simple, | ||
); | ||
|
||
let expected = rewrite_headings( | ||
"# Nested strikethrough emphasis fun times", | ||
Mode::Simple, | ||
); | ||
|
||
assert_eq!(result.unwrap(), expected.unwrap()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters