Skip to content

Commit

Permalink
πŸ‘©β€πŸ’» Improve HTML processing (#680)
Browse files Browse the repository at this point in the history
Fixes #418
  • Loading branch information
rowanc1 authored Oct 17, 2023
1 parent 5737951 commit 8bd4ee2
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 10 deletions.
6 changes: 6 additions & 0 deletions .changeset/rotten-timers-worry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'myst-transforms': patch
'myst-cli': patch
---

Improve HTML transforms for grouping and processing
4 changes: 3 additions & 1 deletion packages/myst-cli/src/process/mdast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
joinGatesPlugin,
glossaryPlugin,
abbreviationPlugin,
reconstructHtmlPlugin,
} from 'myst-transforms';
import { unified } from 'unified';
import { VFile } from 'vfile';
Expand Down Expand Up @@ -164,9 +165,10 @@ export async function transformMdast(
liftCodeMetadataToBlock(session, vfile, mdast);

const pipe = unified()
.use(reconstructHtmlPlugin) // We need to group and link the HTML first
.use(htmlPlugin, { htmlHandlers }) // Some of the HTML plugins need to operate on the transformed html, e.g. figure caption transforms
.use(basicTransformationsPlugin)
.use(inlineExpressionsPlugin) // Happens before math and images!
.use(htmlPlugin, { htmlHandlers })
.use(mathPlugin, { macros: frontmatter.math })
.use(glossaryPlugin, { state }) // This should be before the enumerate plugins
.use(abbreviationPlugin, { abbreviations: frontmatter.abbreviations })
Expand Down
259 changes: 258 additions & 1 deletion packages/myst-transforms/src/html.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, test } from 'vitest';
import { reconstructHtmlTransform } from './html';
import { htmlTransform, reconstructHtmlTransform } from './html';

describe('Test reconstructHtmlTransform', () => {
test('tree without html returns self', async () => {
Expand Down Expand Up @@ -162,4 +162,261 @@ describe('Test reconstructHtmlTransform', () => {
children: [{ type: 'html', value: '<script>alert("error")</script>' }],
});
});
test('self-closing tags', async () => {
const mdast = {
type: 'root',
children: [
{
type: 'html',
value: '<a href="https://mystmd.org">',
},
{
type: 'html',
value: '<img src="https://mystmd.org/logo.png" />',
},
{
type: 'html',
value: '<hr>',
},
{
type: 'html',
value: '<br>',
},
{ type: 'html', value: '</a>' },
],
};
reconstructHtmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'html',
value:
'<a href="https://mystmd.org"><img src="https://mystmd.org/logo.png">\n<hr>\n<br></a>',
},
],
});
});
test('figure captions', async () => {
const mdast = {
type: 'root',
children: [
{
type: 'html',
value: '<figure>',
},
{
type: 'html',
value: '<img src="img.png" class="big" id="my-img">',
},
{
type: 'html',
value: '<figcaption>',
},
{
type: 'text',
value: 'my caption',
},
{
type: 'html',
value: '</figcaption>',
},
{
type: 'html',
value: '</figure>',
},
],
};
reconstructHtmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'html',
value:
'<figure><img src="img.png" class="big" id="my-img">\n<figcaption>my caption</figcaption></figure>',
},
],
});
htmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'container',
children: [
{ type: 'image', url: 'img.png', class: 'big', identifier: 'my-img', label: 'my-img' },
{ type: 'caption', children: [{ type: 'text', value: 'my caption' }] },
],
},
],
});
});
test('no paragraph when in a paragraph', async () => {
const mdast = {
type: 'root',
children: [
{
type: 'paragraph',
children: [
{
type: 'text',
value: 'See ',
},
{
type: 'html',
value: '<a href="link.html">',
},
{
type: 'text',
value: 'here',
},
{
type: 'html',
value: '</a>',
},
{
type: 'text',
value: '.',
},
],
},
],
};
reconstructHtmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'paragraph',
children: [
{
type: 'text',
value: 'See ',
},
{
type: 'html',
value: '<a href="link.html">here</a>',
},
{
type: 'text',
value: '.',
},
],
},
],
});
htmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'paragraph',
children: [
{
type: 'text',
value: 'See ',
},
{
type: 'link',
url: 'link.html',
children: [
{
type: 'text',
value: 'here',
},
],
},
{
type: 'text',
value: '.',
},
],
},
],
});
});
test('no paragraph when in a paragraph', async () => {
const mdast = {
type: 'root',
children: [
{
type: 'paragraph',
children: [
{
type: 'text',
value: 'See ',
},
{
type: 'html',
value: '<sup>',
},
{
type: 'text',
value: '[1]',
},
{
type: 'html',
value: '</sup>',
},
{
type: 'text',
value: '.',
},
],
},
],
};
reconstructHtmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'paragraph',
children: [
{
type: 'text',
value: 'See ',
},
{
type: 'html',
value: '<sup>[1]</sup>',
},
{
type: 'text',
value: '.',
},
],
},
],
});
htmlTransform(mdast);
expect(mdast).toEqual({
type: 'root',
children: [
{
type: 'paragraph',
children: [
{
type: 'text',
value: 'See ',
},
{
type: 'superscript',
children: [
{
type: 'text',
value: '[1]',
},
],
},
{
type: 'text',
value: '.',
},
],
},
],
});
});
});
Loading

0 comments on commit 8bd4ee2

Please sign in to comment.