Skip to content

Commit

Permalink
infra: implement mdbook plugin to strip markup from headings
Browse files Browse the repository at this point in the history
  • Loading branch information
chriskrycho committed Jan 8, 2025
1 parent 1c26628 commit 678ace3
Show file tree
Hide file tree
Showing 7 changed files with 1,115 additions and 628 deletions.
1,378 changes: 751 additions & 627 deletions nostarch/chapter21.md

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions packages/mdbook-trpl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ path = "src/bin/note.rs"
name = "mdbook-trpl-listing"
path = "src/bin/listing.rs"

[[bin]]
name = "mdbook-trpl-heading"
path = "src/bin/heading.rs"

[[bin]]
name = "mdbook-trpl-figure"
path = "src/bin/figure.rs"
Expand Down
38 changes: 38 additions & 0 deletions packages/mdbook-trpl/src/bin/heading.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use std::io;

use clap::{self, Parser, Subcommand};
use mdbook::preprocess::{CmdPreprocessor, Preprocessor};

use mdbook_trpl::Heading;

fn main() -> Result<(), String> {
let cli = Cli::parse();
if let Some(Command::Supports { renderer }) = cli.command {
return if Heading.supports_renderer(&renderer) {
Ok(())
} else {
Err(format!("Renderer '{renderer}' is unsupported"))
};
}

let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())
.map_err(|e| format!("{e}"))?;
let processed = Heading.run(&ctx, book).map_err(|e| format!("{e}"))?;
serde_json::to_writer(io::stdout(), &processed).map_err(|e| format!("{e}"))
}

/// A simple preprocessor for semantic markup for code listings in _The Rust
/// Programming Language_.
#[derive(Parser, Debug)]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
}

#[derive(Subcommand, Debug)]
enum Command {
/// Is the renderer supported?
///
/// This supports the HTML
Supports { renderer: String },
}
2 changes: 1 addition & 1 deletion packages/mdbook-trpl/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use mdbook::preprocess::PreprocessorContext;

#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Mode {
Default,
Simple,
Expand Down
114 changes: 114 additions & 0 deletions packages/mdbook-trpl/src/heading/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
use anyhow::anyhow;
use mdbook::{
book::Book,
preprocess::{Preprocessor, PreprocessorContext},
BookItem,
};
use pulldown_cmark::{Event, Tag, TagEnd};
use pulldown_cmark_to_cmark::cmark;

use crate::{CompositeError, Mode};

pub struct TrplHeading;

impl Preprocessor for TrplHeading {
fn name(&self) -> &str {
"trpl-heading"
}

fn run(
&self,
ctx: &PreprocessorContext,
mut book: Book,
) -> anyhow::Result<Book> {
let mode = Mode::from_context(ctx, self.name())?;

let mut errors = vec![];
book.for_each_mut(|item| {
if let BookItem::Chapter(ref mut chapter) = item {
match rewrite_headings(&chapter.content, mode) {
Ok(rewritten) => chapter.content = rewritten,
Err(reason) => errors.push(reason),
}
}
});

if errors.is_empty() {
Ok(book)
} else {
Err(CompositeError(errors).into())
}
}

fn supports_renderer(&self, renderer: &str) -> bool {
renderer == "html" || renderer == "markdown" || renderer == "test"
}
}

fn rewrite_headings(src: &str, mode: Mode) -> anyhow::Result<String> {
// Don't rewrite anything for the default mode.
if mode == Mode::Default {
return Ok(src.into());
}

#[derive(Default)]
struct State<'e> {
in_heading: bool,
events: Vec<Event<'e>>,
}

let final_state: State = crate::parser(src).try_fold(
State::default(),
|mut state, event| -> anyhow::Result<State> {
if state.in_heading {
match event {
// When we see the start or end of any of the inline tags
// (emphasis, strong emphasis, or strikethrough), or any
// inline HTML tags, we just skip emitting them. As dumb as
// that may seem, it does the job!
Event::Start(
Tag::Emphasis | Tag::Strong | Tag::Strikethrough,
)
| Event::End(
TagEnd::Emphasis
| TagEnd::Strong
| TagEnd::Strikethrough,
)
| Event::InlineHtml(_) => { /* skip */ }

// For code, we just emit the body of the inline code block,
// unchanged (the wrapping backticks are not present here).
Event::Code(code) => {
state.events.push(Event::Text(code));
}

// Assume headings are well-formed; you cannot have a nested
// headings, so we don't have to check heading level.
Event::End(TagEnd::Heading(_)) => {
state.in_heading = false;
state.events.push(event);
}
_ => state.events.push(event),
}
} else if matches!(event, Event::Start(Tag::Heading { .. })) {
state.events.push(event);
state.in_heading = true;
} else {
state.events.push(event);
}

Ok(state)
},
)?;

if final_state.in_heading {
return Err(anyhow!("Unclosed heading"));
}

let mut rewritten = String::new();
cmark(final_state.events.into_iter(), &mut rewritten)?;
Ok(rewritten)
}

#[cfg(test)]
mod tests;
205 changes: 205 additions & 0 deletions packages/mdbook-trpl/src/heading/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
use super::*;

#[test]
fn default_mode_is_unchanged() {
let result = rewrite_headings(
"# This is *emphasized* and **strong** and `code`
## Here is *another* and **strong** and `code`
### Third *level* **heading** with `code`
#### Fourth *heading* **level** and `code`
##### Fifth *level* **heading** and `code`
###### Last *heading* **level** with `code`
",
Mode::Default,
);

assert_eq!(
result.unwrap(),
"# This is *emphasized* and **strong** and `code`
## Here is *another* and **strong** and `code`
### Third *level* **heading** with `code`
#### Fourth *heading* **level** and `code`
##### Fifth *level* **heading** and `code`
###### Last *heading* **level** with `code`
"
);
}

// Note: these tests all check that the result of rewriting a header *with* and
// *without* the markup is the same, so that other “normalization” that happens
// along the way (inserting or removing newlines, e.g.) is ignored.
mod simple_mode {
use super::*;

#[test]
fn strips_em() {
let result = rewrite_headings(
"# This is *emphasized* and _this is too_
## Here is *another* and _emphasis style_
### Third *level* _heading_ here
#### Fourth *heading* _level_ text
##### Fifth *level* _heading_ now
###### Last *heading* _level_ test
",
Mode::Simple,
);

let expected = rewrite_headings(
"# This is emphasized and this is too
## Here is another and emphasis style
### Third level heading here
#### Fourth heading level text
##### Fifth level heading now
###### Last heading level test
",
Mode::Simple,
);

assert_eq!(result.unwrap(), expected.unwrap());
}

#[test]
fn strips_nested_em() {
let result = rewrite_headings(
"# *This _is *extra* emphatic_ emphasis*.",
Mode::Simple,
);
let expected = "# This is extra emphatic emphasis.";

assert_eq!(result.unwrap(), expected);
}

#[test]
fn strips_strong() {
let result = rewrite_headings(
"# This is **strong** and __this is too__
## Here is **another** and __strong style__
### Third **level** __heading__ here
#### Fourth **heading** __level__ text
##### Fifth **level** __heading__ now
###### Last **heading** __level__ test
",
Mode::Simple,
);

let expected = rewrite_headings(
"# This is strong and this is too
## Here is another and strong style
### Third level heading here
#### Fourth heading level text
##### Fifth level heading now
###### Last heading level test
",
Mode::Simple,
);

assert_eq!(result.unwrap(), expected.unwrap());
}

#[test]
fn strips_nested_strong() {
let result = rewrite_headings(
"# **This __is **extra** emphatic__ emphasis**.",
Mode::Simple,
);
let expected = "# This is extra emphatic emphasis.";

assert_eq!(result.unwrap(), expected);
}

#[test]
fn strips_code() {
let result = rewrite_headings(
"# This is `code`
## Here is `another`
### Third `level`
#### Fourth `heading`
##### Fifth `level`
###### Last `heading`
",
Mode::Simple,
);

let expected = rewrite_headings(
"# This is code
## Here is another
### Third level
#### Fourth heading
##### Fifth level
###### Last heading
",
Mode::Simple,
);

assert_eq!(result.unwrap(), expected.unwrap());
}

#[test]
fn strips_html() {
let result = rewrite_headings(
"# This is <span>html</span>
## Here is <span>another</span>
### Third <span>level</span>
#### Fourth <span>heading</span>
##### Fifth <span>level</span>
###### Last <span>heading</span>
",
Mode::Simple,
);

let expected = rewrite_headings(
"# This is html
## Here is another
### Third level
#### Fourth heading
##### Fifth level
###### Last heading
",
Mode::Simple,
);

assert_eq!(result.unwrap(), expected.unwrap());
}

#[test]
fn strips_strikethrough() {
let result = rewrite_headings(
"# This is ~~strikethrough~~
## Here is ~~another~~
### Third ~~level~~
#### Fourth ~~heading~~
##### Fifth ~~level~~
###### Last ~~heading~~
",
Mode::Simple,
);

let expected = rewrite_headings(
"# This is strikethrough
## Here is another
### Third level
#### Fourth heading
##### Fifth level
###### Last heading
",
Mode::Simple,
);

assert_eq!(result.unwrap(), expected.unwrap());
}

#[test]
fn strips_nested_combinations() {
let result = rewrite_headings(
"# **Nested ~~strikethrough _emphasis_ fun~~ times**",
Mode::Simple,
);

let expected = rewrite_headings(
"# Nested strikethrough emphasis fun times",
Mode::Simple,
);

assert_eq!(result.unwrap(), expected.unwrap())
}
}
2 changes: 2 additions & 0 deletions packages/mdbook-trpl/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
mod config;
mod figure;
mod heading;
mod listing;
mod note;

pub use config::Mode;
pub use figure::TrplFigure as Figure;
pub use heading::TrplHeading as Heading;
pub use listing::TrplListing as Listing;
pub use note::TrplNote as Note;
use pulldown_cmark::{Options, Parser};
Expand Down

0 comments on commit 678ace3

Please sign in to comment.