From 14a6f5c205bf0347154285c14f80a0c8e089bad6 Mon Sep 17 00:00:00 2001
From: ctcpip <ctcpip@users.noreply.github.com>
Date: Fri, 13 Oct 2023 13:07:21 -0500
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20fix=20extra=20whitespace=20in=20lis?=
 =?UTF-8?q?ts?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/bad-linebreaks-test.mjs               |  8 +--
 scripts/bad-linebreaks.mjs                    | 49 +++++++++++++++----
 .../bad-linebreaks-sample-after.md            | 15 ++++++
 .../bad-linebreaks-sample-before.md           | 15 ++++++
 4 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/scripts/bad-linebreaks-test.mjs b/scripts/bad-linebreaks-test.mjs
index 6bf1d4c4..516923a1 100644
--- a/scripts/bad-linebreaks-test.mjs
+++ b/scripts/bad-linebreaks-test.mjs
@@ -7,8 +7,8 @@ const afterMD = './scripts/test-samples/bad-linebreaks-sample-after.md';
 const beforeMD = './scripts/test-samples/bad-linebreaks-sample-before.md';
 
 // verify hash values to detect file tampering
-const knownAfterHash = 'c2b5b7cc30cf5d4ce28274848eeba743';
-const knownBeforeHash = 'c9cf57714ec19de2aeea68d45536b119';
+const knownAfterHash = '5f29c1fb4abd747c2dd801e12c600ee3';
+const knownBeforeHash = '406e900af5cd9af66abbe5b3ab6bcf3e';
 const afterHash = await getHashSlingingSlasher(afterMD);
 const beforeHash = await getHashSlingingSlasher(beforeMD);
 assert.strictEqual(afterHash, knownAfterHash);
@@ -18,7 +18,7 @@ let fixed, totalMatches;
 
 ({ fixed, totalMatches } = findBadStuff(beforeMD, true));
 assert.strictEqual(totalMatches.badLinebreaks, 12);
-assert.strictEqual(totalMatches.extraWhitespace, 28);
+assert.strictEqual(totalMatches.extraWhitespace, 114);
 assert.strictEqual(fixed, fs.readFileSync(afterMD, 'utf8').toString());
 
 ({ fixed, totalMatches } = findBadStuff(afterMD, true));
@@ -28,7 +28,7 @@ assert.strictEqual(fixed, fs.readFileSync(afterMD, 'utf8').toString());
 
 ({ fixed, totalMatches } = findBadStuff(beforeMD));
 assert.strictEqual(totalMatches.badLinebreaks, 12);
-assert.strictEqual(totalMatches.extraWhitespace, 28);
+assert.strictEqual(totalMatches.extraWhitespace, 114);
 
 function getHashSlingingSlasher(file) {  // 💀
   return new Promise((res, rej) => {
diff --git a/scripts/bad-linebreaks.mjs b/scripts/bad-linebreaks.mjs
index 9f3a43ad..db43f5c5 100644
--- a/scripts/bad-linebreaks.mjs
+++ b/scripts/bad-linebreaks.mjs
@@ -7,8 +7,16 @@ import { glob } from 'glob';
 // import attributes when?
 const mdlintConfig = JSON.parse(fs.readFileSync('.markdownlint-cli2.jsonc', 'utf8').toString());
 
+// not exhaustive, just the types we care about
+const tokenTypeEnum = Object.freeze({
+  LIST: 'list',
+  PARAGRAPH: 'paragraph',
+  SPACE: 'space',
+});
+
 const reBadLinebreaks = /(?<=[\w\d ])\n(?=[\w\d ])/g;
-const reExtraWhitespace = /^ +| (?= )| +$/gm;
+const reExtraWhitespaceParagraph = /^ +| (?= )| +$/gm;
+const reExtraWhitespaceList = /(?<=^ {0,}[-*+] |\d+\. ) +|(?<=\w+ ) +| +$/gm;
 
 export function findBadStuff(file, fix = false) {
 
@@ -24,9 +32,16 @@ export function findBadStuff(file, fix = false) {
     const t = tokens[i];
     let tokenContent = t.raw;
 
-    if (t.type === 'paragraph') {
-      tokenContent = findBadLinebreaks(tokenContent, totalMatches, fix, file);
-      tokenContent = findExtraWhitespace(tokenContent, totalMatches, fix, file);
+    switch (t.type) {
+      case tokenTypeEnum.PARAGRAPH:
+        tokenContent = findBadLinebreaks(tokenContent, totalMatches, fix, file, t.type);
+        // falls through
+      case tokenTypeEnum.LIST:
+      case tokenTypeEnum.SPACE:
+        tokenContent = findExtraWhitespace(tokenContent, totalMatches, fix, file, t.type);
+        break;
+      default:
+        // do nothing
     }
 
     // we don't need to build this array if `fix` is `false`, but this keeps complexity down
@@ -41,7 +56,7 @@ export function findBadStuff(file, fix = false) {
 
 }
 
-function findBadLinebreaks(tokenContent, totalMatches, fix, file) {
+function findBadLinebreaks(tokenContent, totalMatches, fix, file, tokenType) {
 
   const matches = Array.from(tokenContent.matchAll(reBadLinebreaks));
   totalMatches.badLinebreaks += matches.length;
@@ -61,27 +76,41 @@ function findBadLinebreaks(tokenContent, totalMatches, fix, file) {
     }
 
   } else if (matches.length > 0) {
-    console.error(`${file}\nfound paragraph with ${matches.length} erroneous linebreak(s):\n${tokenContent}\n`);
+    console.error(`${file}\nfound ${tokenType} with ${matches.length} erroneous linebreak(s):\n${tokenContent}\n`);
   }
 
   return tokenContent;
 
 }
 
-function findExtraWhitespace(tokenContent, totalMatches, fix, file) {
+function findExtraWhitespace(tokenContent, totalMatches, fix, file, tokenType) {
+
+  let re;
+
+  switch (tokenType) {
+    case tokenTypeEnum.PARAGRAPH:
+    case tokenTypeEnum.SPACE:
+      re = reExtraWhitespaceParagraph;
+      break;
+    case tokenTypeEnum.LIST:
+      re = reExtraWhitespaceList;
+      break;
+    default:
+      throw new TypeError(`unsupported token type: ${tokenType}`);
+  }
 
-  const matches = Array.from(tokenContent.matchAll(reExtraWhitespace));
+  const matches = Array.from(tokenContent.matchAll(re));
   const extraWhitespaceCharacters = matches.join('').length;
   totalMatches.extraWhitespace += extraWhitespaceCharacters;
 
   if (fix) {
 
     if (matches.length > 0) {
-      return tokenContent.replace(reExtraWhitespace, '');
+      return tokenContent.replace(re, '');
     }
 
   } else if (matches.length > 0) {
-    console.error(`${file}\nfound paragraph with ${extraWhitespaceCharacters} extra whitespace character(s):\n${tokenContent}\n`);
+    console.error(`${file}\nfound ${tokenType} with ${extraWhitespaceCharacters} extra whitespace character(s):\n${tokenContent}\n`);
   }
 
   return tokenContent;
diff --git a/scripts/test-samples/bad-linebreaks-sample-after.md b/scripts/test-samples/bad-linebreaks-sample-after.md
index ef824a26..daf2183d 100644
--- a/scripts/test-samples/bad-linebreaks-sample-after.md
+++ b/scripts/test-samples/bad-linebreaks-sample-after.md
@@ -38,6 +38,21 @@ let biscuits = "delicious";
 let biscuits = "delicious";
 ```
 
+100. First list item
+       - First nested list item
+         - Second nested list item
+
+1. biscuits are
+1. extremely delicious
+1. indeed
+
+- lists
+* are
++ fun
+  - and
+  * one more
+  + time
+
 ## story time!
 
 True! nervous, very, very dreadfully nervous I had been and am; but why will you say that I am mad? The disease had sharpened my senses, not destroyed, not dulled them. Above all was the sense of hearing acute. I heard all things in the heaven and in the earth. I heard many things in hell. How then am I mad? Hearken! and observe how healthily, how calmly I can tell you the whole story.
diff --git a/scripts/test-samples/bad-linebreaks-sample-before.md b/scripts/test-samples/bad-linebreaks-sample-before.md
index e3043594..fc2cda9f 100644
--- a/scripts/test-samples/bad-linebreaks-sample-before.md
+++ b/scripts/test-samples/bad-linebreaks-sample-before.md
@@ -44,6 +44,21 @@ let biscuits = "delicious";
 let biscuits = "delicious";
 ```
 
+100.     First     list    item 
+       -    First  nested  list  item 
+         -    Second     nested   list   item    
+
+1.  biscuits    are    
+1. extremely        delicious   
+1.   indeed 
+
+-       lists
+* are  
++  fun 
+  -  and 
+  *     one   more       
+  +     time  
+
 ## story time!
 
 True! nervous, very, very dreadfully nervous I had been and am; but why will you say that I am mad? The disease had sharpened my senses,  not destroyed,