Skip to content

Commit

Permalink
Link checker parses headings inside component (#2230)
Browse files Browse the repository at this point in the history
The link checker was failing to detect inline class methods (related to
#2210) because it does not
peek inside `<Class>` components.

To fix this, we now use our own parsing code for headings. 

It's possible there will be future edge cases this PR does not account
for in its heading parsing. Our link checker will fail in that case and
we can forward fix.
  • Loading branch information
Eric-Arellano authored Nov 4, 2024
1 parent fedadab commit c12e2ac
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
21 changes: 21 additions & 0 deletions scripts/js/lib/links/extractLinks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,43 @@ test("parseAnchors()", () => {
## \`code-header\`
## Header.with periods-and wild! punctuation?? and numbers 1234 8 (parentheses)
## header_using\_underscores
## UpperCase Should Be lowercase
## repeated
## repeated
## repeated
<Function id="mdx.component.testId" name="testId" signature="testId">
Convert to dictionary.
**Return type**
\`Dict\`
</Function>
<Class>
### Header inside a component
</Class>
`);
expect(result).toEqual(
new Set([
"#my-top-level-heading",
"#header-2",
"#code-header",
"#headerwith-periods-and-wild-punctuation-and-numbers-1234-8-parentheses",
"#header_using_underscores",
"#uppercase-should-be-lowercase",
"#this-is-a-hardcoded-anchor",
"#another_span",
"#mdx.component.testId",
"#header-inside-a-component",
"#repeated",
"#repeated-1",
"#repeated-2",
]),
);
});
Expand Down
27 changes: 22 additions & 5 deletions scripts/js/lib/links/extractLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,28 @@ export type ParsedFile = {
};

export function parseAnchors(markdown: string): Set<string> {
// Anchors generated from markdown titles.
const mdAnchors = markdownLinkExtractor(markdown).anchors;
// Anchors from HTML id tags.
const idAnchors = markdown.match(/(?<=id=")(.+?)(?=")/gm) || [];
return new Set([...mdAnchors, ...idAnchors.map((id) => `#${id}`)]);
const lines = markdown.split("\n");
const anchors = new Set<string>();
for (const line of lines) {
const heading = line.match(/^\s*#{1,6}\s+(.+?)\s*$/);
if (heading) {
const normalized = heading[1]
.toLowerCase()
.trim()
.replaceAll(" ", "-")
.replaceAll(/[\.,;!?`\\\(\)]/g, "");
let deduplicated = normalized;
let i = 1;
while (anchors.has(`#${deduplicated}`)) {
deduplicated = `${normalized}-${i}`;
i += 1;
}
anchors.add(`#${deduplicated}`);
}
const id = line.match(/(?<=id=")(.+?)(?=")/);
if (id) anchors.add(`#${id[1]}`);
}
return anchors;
}

export async function parseLinks(
Expand Down

0 comments on commit c12e2ac

Please sign in to comment.