From 14a6f5c205bf0347154285c14f80a0c8e089bad6 Mon Sep 17 00:00:00 2001 From: ctcpip Date: Fri, 13 Oct 2023 13:07:21 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20fix=20extra=20whitespace=20in=20lis?= =?UTF-8?q?ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/bad-linebreaks-test.mjs | 8 +-- scripts/bad-linebreaks.mjs | 49 +++++++++++++++---- .../bad-linebreaks-sample-after.md | 15 ++++++ .../bad-linebreaks-sample-before.md | 15 ++++++ 4 files changed, 73 insertions(+), 14 deletions(-) diff --git a/scripts/bad-linebreaks-test.mjs b/scripts/bad-linebreaks-test.mjs index 6bf1d4c4..516923a1 100644 --- a/scripts/bad-linebreaks-test.mjs +++ b/scripts/bad-linebreaks-test.mjs @@ -7,8 +7,8 @@ const afterMD = './scripts/test-samples/bad-linebreaks-sample-after.md'; const beforeMD = './scripts/test-samples/bad-linebreaks-sample-before.md'; // verify hash values to detect file tampering -const knownAfterHash = 'c2b5b7cc30cf5d4ce28274848eeba743'; -const knownBeforeHash = 'c9cf57714ec19de2aeea68d45536b119'; +const knownAfterHash = '5f29c1fb4abd747c2dd801e12c600ee3'; +const knownBeforeHash = '406e900af5cd9af66abbe5b3ab6bcf3e'; const afterHash = await getHashSlingingSlasher(afterMD); const beforeHash = await getHashSlingingSlasher(beforeMD); assert.strictEqual(afterHash, knownAfterHash); @@ -18,7 +18,7 @@ let fixed, totalMatches; ({ fixed, totalMatches } = findBadStuff(beforeMD, true)); assert.strictEqual(totalMatches.badLinebreaks, 12); -assert.strictEqual(totalMatches.extraWhitespace, 28); +assert.strictEqual(totalMatches.extraWhitespace, 114); assert.strictEqual(fixed, fs.readFileSync(afterMD, 'utf8').toString()); ({ fixed, totalMatches } = findBadStuff(afterMD, true)); @@ -28,7 +28,7 @@ assert.strictEqual(fixed, fs.readFileSync(afterMD, 'utf8').toString()); ({ fixed, totalMatches } = findBadStuff(beforeMD)); assert.strictEqual(totalMatches.badLinebreaks, 12); -assert.strictEqual(totalMatches.extraWhitespace, 28); +assert.strictEqual(totalMatches.extraWhitespace, 114); function getHashSlingingSlasher(file) { // 💀 return new Promise((res, rej) => { diff --git a/scripts/bad-linebreaks.mjs b/scripts/bad-linebreaks.mjs index 9f3a43ad..db43f5c5 100644 --- a/scripts/bad-linebreaks.mjs +++ b/scripts/bad-linebreaks.mjs @@ -7,8 +7,16 @@ import { glob } from 'glob'; // import attributes when? const mdlintConfig = JSON.parse(fs.readFileSync('.markdownlint-cli2.jsonc', 'utf8').toString()); +// not exhaustive, just the types we care about +const tokenTypeEnum = Object.freeze({ + LIST: 'list', + PARAGRAPH: 'paragraph', + SPACE: 'space', +}); + const reBadLinebreaks = /(?<=[\w\d ])\n(?=[\w\d ])/g; -const reExtraWhitespace = /^ +| (?= )| +$/gm; +const reExtraWhitespaceParagraph = /^ +| (?= )| +$/gm; +const reExtraWhitespaceList = /(?<=^ {0,}[-*+] |\d+\. ) +|(?<=\w+ ) +| +$/gm; export function findBadStuff(file, fix = false) { @@ -24,9 +32,16 @@ export function findBadStuff(file, fix = false) { const t = tokens[i]; let tokenContent = t.raw; - if (t.type === 'paragraph') { - tokenContent = findBadLinebreaks(tokenContent, totalMatches, fix, file); - tokenContent = findExtraWhitespace(tokenContent, totalMatches, fix, file); + switch (t.type) { + case tokenTypeEnum.PARAGRAPH: + tokenContent = findBadLinebreaks(tokenContent, totalMatches, fix, file, t.type); + // falls through + case tokenTypeEnum.LIST: + case tokenTypeEnum.SPACE: + tokenContent = findExtraWhitespace(tokenContent, totalMatches, fix, file, t.type); + break; + default: + // do nothing } // we don't need to build this array if `fix` is `false`, but this keeps complexity down @@ -41,7 +56,7 @@ export function findBadStuff(file, fix = false) { } -function findBadLinebreaks(tokenContent, totalMatches, fix, file) { +function findBadLinebreaks(tokenContent, totalMatches, fix, file, tokenType) { const matches = Array.from(tokenContent.matchAll(reBadLinebreaks)); totalMatches.badLinebreaks += matches.length; @@ -61,27 +76,41 @@ function findBadLinebreaks(tokenContent, totalMatches, fix, file) { } } else if (matches.length > 0) { - console.error(`${file}\nfound paragraph with ${matches.length} erroneous linebreak(s):\n${tokenContent}\n`); + console.error(`${file}\nfound ${tokenType} with ${matches.length} erroneous linebreak(s):\n${tokenContent}\n`); } return tokenContent; } -function findExtraWhitespace(tokenContent, totalMatches, fix, file) { +function findExtraWhitespace(tokenContent, totalMatches, fix, file, tokenType) { + + let re; + + switch (tokenType) { + case tokenTypeEnum.PARAGRAPH: + case tokenTypeEnum.SPACE: + re = reExtraWhitespaceParagraph; + break; + case tokenTypeEnum.LIST: + re = reExtraWhitespaceList; + break; + default: + throw new TypeError(`unsupported token type: ${tokenType}`); + } - const matches = Array.from(tokenContent.matchAll(reExtraWhitespace)); + const matches = Array.from(tokenContent.matchAll(re)); const extraWhitespaceCharacters = matches.join('').length; totalMatches.extraWhitespace += extraWhitespaceCharacters; if (fix) { if (matches.length > 0) { - return tokenContent.replace(reExtraWhitespace, ''); + return tokenContent.replace(re, ''); } } else if (matches.length > 0) { - console.error(`${file}\nfound paragraph with ${extraWhitespaceCharacters} extra whitespace character(s):\n${tokenContent}\n`); + console.error(`${file}\nfound ${tokenType} with ${extraWhitespaceCharacters} extra whitespace character(s):\n${tokenContent}\n`); } return tokenContent; diff --git a/scripts/test-samples/bad-linebreaks-sample-after.md b/scripts/test-samples/bad-linebreaks-sample-after.md index ef824a26..daf2183d 100644 --- a/scripts/test-samples/bad-linebreaks-sample-after.md +++ b/scripts/test-samples/bad-linebreaks-sample-after.md @@ -38,6 +38,21 @@ let biscuits = "delicious"; let biscuits = "delicious"; ``` +100. First list item + - First nested list item + - Second nested list item + +1. biscuits are +1. extremely delicious +1. indeed + +- lists +* are ++ fun + - and + * one more + + time + ## story time! True! nervous, very, very dreadfully nervous I had been and am; but why will you say that I am mad? The disease had sharpened my senses, not destroyed, not dulled them. Above all was the sense of hearing acute. I heard all things in the heaven and in the earth. I heard many things in hell. How then am I mad? Hearken! and observe how healthily, how calmly I can tell you the whole story. diff --git a/scripts/test-samples/bad-linebreaks-sample-before.md b/scripts/test-samples/bad-linebreaks-sample-before.md index e3043594..fc2cda9f 100644 --- a/scripts/test-samples/bad-linebreaks-sample-before.md +++ b/scripts/test-samples/bad-linebreaks-sample-before.md @@ -44,6 +44,21 @@ let biscuits = "delicious"; let biscuits = "delicious"; ``` +100. First list item + - First nested list item + - Second nested list item + +1. biscuits are +1. extremely delicious +1. indeed + +- lists +* are ++ fun + - and + * one more + + time + ## story time! True! nervous, very, very dreadfully nervous I had been and am; but why will you say that I am mad? The disease had sharpened my senses, not destroyed,