From f416134541d20afb93fc30c677c1966e6ade7054 Mon Sep 17 00:00:00 2001
From: Gaurav <gaurav-nelson@users.noreply.github.com>
Date: Sun, 12 May 2024 10:21:08 +1000
Subject: [PATCH 1/3] Added asciidoc extraction capabilities, fixed asciidoc
 hyperlinks check

---
 index.js                           |   4 +-
 lib/extract-asciidoc-links.js      | 450 +++++++++++++++++++++++++++++
 lib/extract-markdown-hyperlinks.js |  33 +--
 lib/handle-links-modification.js   |  42 +++
 linkspector.js                     |   4 +-
 test/fixtures/asciidoc/test1.adoc  |   3 +
 test/fixtures/asciidoc/test2.adoc  |   1 +
 7 files changed, 505 insertions(+), 32 deletions(-)
 create mode 100644 lib/extract-asciidoc-links.js
 create mode 100644 lib/handle-links-modification.js
 create mode 100644 test/fixtures/asciidoc/test1.adoc
 create mode 100644 test/fixtures/asciidoc/test2.adoc

diff --git a/index.js b/index.js
index eb33047..a5feb4d 100755
--- a/index.js
+++ b/index.js
@@ -46,7 +46,7 @@ program
           if (linkStatusObj.status === "error") {
             if (cmd.json) {
               results.diagnostics.push({
-                message: linkStatusObj.error_message,
+                message: `Connot reach ${linkStatusObj.link}. Status: ${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ''}`,
                 location: {
                   path: currentFile,
                   range: {
@@ -67,7 +67,7 @@ program
               spinner.stop();
               console.log(
                 kleur.red(
-                  `💥 ${currentFile} - Line ${linkStatusObj.line_number}: ${linkStatusObj.error_message}`
+                  `🚫 ${currentFile}, ${linkStatusObj.link} , ${linkStatusObj.status_code}, ${linkStatusObj.line_number}, ${linkStatusObj.error_message}`
                 )
               );
               spinner.start(`Checking ${currentFile}...\n`);
diff --git a/lib/extract-asciidoc-links.js b/lib/extract-asciidoc-links.js
new file mode 100644
index 0000000..fb11f6e
--- /dev/null
+++ b/lib/extract-asciidoc-links.js
@@ -0,0 +1,450 @@
+import fs from "fs";
+import readline from "readline";
+import { doReplacements } from "./handle-links-modification.js";
+
+function extractAsciiDocLinks(filePath, options) {
+  return new Promise((resolve) => {
+    const links = [];
+    const internalRefs = new Map();
+    const externalRefs = new Map();
+    const externalURLs = new Map();
+
+    let insideCommentBlock = false;
+
+    const rl = readline.createInterface({
+      input: fs.createReadStream(filePath),
+      crlfDelay: Infinity,
+    });
+
+    let lineNumber = 0;
+
+    const urlRegex =
+      /(?:https?|ftp|irc|mailto):\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g;
+
+    rl.on("line", (line) => {
+      lineNumber++;
+      // Ignore comment blocks
+      if (line.startsWith("////")) {
+        insideCommentBlock = !insideCommentBlock;
+      }
+      if (insideCommentBlock) {
+        return;
+      }
+      // Ignore single-line comments
+      if (line.startsWith("//")) {
+        return;
+      }
+      // Extract external hyperlinks
+      let match;
+      while ((match = urlRegex.exec(line)) !== null) {
+        const url = match[0].replace(/^link:/, ""); // Remove 'link:' prefix if present
+        const position = {
+          start: { line: lineNumber, column: match.index, offset: match.index },
+          end: {
+            line: lineNumber,
+            column: match.index + match[0].length,
+            offset: match.index + match[0].length,
+          },
+        };
+        const linkNode = {
+          type: "link",
+          title: null,
+          url,
+          children: [],
+          position,
+        };
+        const existingLink = links.find(
+          (link) =>
+            link.url === linkNode.url &&
+            link.position.start.line === linkNode.position.start.line &&
+            link.position.start.column === linkNode.position.start.column
+        );
+        if (!existingLink) {
+          links.push(linkNode); // Add link to the array only if it's not already there
+        }
+        continue;
+      }
+      // Extract internal and external references
+      if (line.match(/\[\[[^\]]+\]\]/g)) {
+        let extractLink = line.match(/\[\[[^\]]+\]\]/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newAnchor = extractLink[i];
+          newAnchor = newAnchor.replace("[[", "");
+          newAnchor = newAnchor.replace("]]", "");
+          newAnchor = newAnchor.replace(/,.*/g, ""); // take into account ','
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 2; // Add 2 to account for the [[ characters
+          const endColumn = startColumn + newAnchor.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newAnchor.length,
+          };
+          const position = {
+            start: startPosition,
+            end: endPosition,
+          };
+          const linkNode = {
+            type: "internal-ref",
+            title: null,
+            url: newAnchor,
+            children: [],
+            position,
+          };
+          internalRefs.set(newAnchor, linkNode);
+        }
+        return;
+      }
+      if (line.match(/^[\s]*[\*\-][\s]+\[\[\[[^\]]+\]\]\]/g)) {
+        let extractLink = line.match(/\[\[\[[^\]]+\]\]\]/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newAnchor = extractLink[i];
+          newAnchor = newAnchor.replace("[[[", "");
+          newAnchor = newAnchor.replace("]]]", "");
+          newAnchor = newAnchor.replace(/,.*/g, ""); // take into account ','
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 4; // Add 4 to account for the [*-] and [[[ characters
+          const endColumn = startColumn + newAnchor.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newAnchor.length,
+          };
+          const position = {
+            start: startPosition,
+            end: endPosition,
+          };
+          const linkNode = {
+            type: "internal-ref",
+            title: null,
+            url: newAnchor,
+            children: [],
+            position,
+          };
+          internalRefs.set(newAnchor, linkNode);
+        }
+        return;
+      }
+      if (line.match(/\[#[^\]]+\]/g)) {
+        let extractLink = line.match(/\[#[^\]]+\]/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newAnchor = extractLink[i];
+          newAnchor = newAnchor.replace("[#", "");
+          newAnchor = newAnchor.replace("]", "");
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 2; // Add 2 to account for the [# characters
+          const endColumn = startColumn + newAnchor.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newAnchor.length,
+          };
+          const position = {
+            start: startPosition,
+            end: endPosition,
+          };
+          const linkNode = {
+            type: "internal-ref",
+            title: null,
+            url: newAnchor,
+            children: [],
+            position,
+          };
+          internalRefs.set(newAnchor, linkNode);
+        }
+        return;
+      }
+      if (line.match(/(anchor:)[^\[]+\[[^\]]*\]/g)) {
+        let extractLink = line.match(/(anchor:)[^\[]+\[/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newAnchor = extractLink[i];
+          newAnchor = newAnchor.replace("anchor:", "");
+          newAnchor = newAnchor.replace("[", "");
+
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 7; // Add 7 to account for the "anchor:" characters
+          const endColumn = startColumn + newAnchor.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newAnchor.length,
+          };
+          const position = {
+            start: startPosition,
+            end: endPosition,
+          };
+          const linkNode = {
+            type: "internal-ref",
+            title: null,
+            url: newAnchor,
+            children: [],
+            position,
+          };
+          internalRefs.set(newAnchor, linkNode);
+        }
+        return;
+      }
+      if (line.match(/<<[^\>]+>>/g)) {
+        let extractLink = line.match(/<<[^\>]+>>/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newReference = extractLink[i];
+          newReference = newReference.replace("<<", "");
+          newReference = newReference.replace(">>", "");
+          newReference = newReference.replace(/,.*/g, ""); // take into account <<anchor, some text>>
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 2; // Add 2 to account for the << characters
+          const endColumn = startColumn + newReference.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newReference.length,
+          };
+          if (newReference.startsWith("#")) {
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "internal-ref",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            internalRefs.set(newReference, linkNode);
+          } else if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) {
+            newReference = newReference.replace(
+              /(\.adoc|\.asciidoc|\.asc)(#)?/,
+              function (_, extension) {
+                return extension + "#";
+              }
+            );
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "external-ref",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            externalRefs.set(newReference, linkNode);
+          } else {
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "internal-ref",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            internalRefs.set(newReference, linkNode);
+          }
+        }
+        return;
+      }
+      if (line.match(/xref:[^\[]+\[[^\]]*\]/g)) {
+        let extractLink = line.match(/xref:[^\[]+\[/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newReference = extractLink[i];
+          newReference = newReference.replace("xref:", "");
+          newReference = newReference.replace("[", "");
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 5; // Add 5 to account for the "xref:" characters
+          const endColumn = startColumn + newReference.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newReference.length,
+          };
+          if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) {
+            newReference = newReference.replace(
+              /(\.adoc|\.asciidoc|\.asc)(#)?/,
+              function (_, extension) {
+                return extension + "#";
+              }
+            );
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "external-ref",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            externalRefs.set(newReference, linkNode);
+          } else {
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "internal-ref",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            internalRefs.set(newReference, linkNode);
+          }
+        }
+        return;
+      }
+      if (line.match(/link:[^\[]+\[[^\]]*\]/g)) {
+        let extractLink = line.match(/link:[^\[]+\[/g);
+        for (let i = 0; i < extractLink.length; i++) {
+          let newReference = extractLink[i];
+          newReference = newReference.replace("link:", "");
+          newReference = newReference.replace("[", "");
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex + 5; // Add 5 to account for the "link:" characters
+          const endColumn = startColumn + newReference.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newReference.length,
+          };
+          if (newReference.match(/^(http|https):\/\//g)) {
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "link",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            const existingLink = links.find(
+              (link) =>
+                link.url === linkNode.url &&
+                link.position.start.line === linkNode.position.start.line &&
+                link.position.start.column === linkNode.position.start.column
+            );
+            if (!existingLink) {
+              links.push(linkNode); // Add link to the array only if it's not already there
+            }
+          } else if (newReference.match(/^(ftp|irc|mailto):\/\//g)) {
+            // we currently don't handle these
+          } else {
+            newReference = newReference.replace(/(\.html?5?)#.*/, "$1");
+            const position = {
+              start: startPosition,
+              end: endPosition,
+            };
+            const linkNode = {
+              type: "external-ref",
+              title: null,
+              url: newReference,
+              children: [],
+              position,
+            };
+            externalRefs.set(newReference, linkNode);
+          }
+        }
+        return;
+      }
+      if (
+        line.match(
+          /(?:^|<|[\s>\(\)\[\];])((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g
+        )
+      ) {
+        let extractLink = line.match(
+          /((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g
+        );
+        for (let i = 0; i < extractLink.length; i++) {
+          let newReference = extractLink[i];
+          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const startColumn = matchIndex;
+          const endColumn = startColumn + newReference.length;
+          const startPosition = {
+            line: lineNumber,
+            column: startColumn,
+            offset: matchIndex,
+          };
+          const endPosition = {
+            line: lineNumber,
+            column: endColumn,
+            offset: matchIndex + newReference.length,
+          };
+          const position = {
+            start: startPosition,
+            end: endPosition,
+          };
+          const linkNode = {
+            type: "link",
+            title: null,
+            url: newReference,
+            children: [],
+            position,
+          };
+          const existingLink = links.find(
+            (link) =>
+              link.url === linkNode.url &&
+              link.position.start.line === linkNode.position.start.line &&
+              link.position.start.column === linkNode.position.start.column
+          );
+          if (!existingLink) {
+            links.push(linkNode); // Add link to the array only if it's not already there
+          }
+        }
+        return;
+      }
+    });
+    rl.on("close", () => {
+      const result = [
+        ...links.values(),
+        ...internalRefs.values(),
+        ...externalRefs.values(),
+        ...externalURLs.values(),
+      ];
+      resolve(doReplacements(result, options));
+    });
+  });
+}
+
+export { extractAsciiDocLinks };
diff --git a/lib/extract-markdown-hyperlinks.js b/lib/extract-markdown-hyperlinks.js
index 69b6abb..28f866c 100644
--- a/lib/extract-markdown-hyperlinks.js
+++ b/lib/extract-markdown-hyperlinks.js
@@ -2,6 +2,7 @@ import { unified } from "unified";
 import remarkParse from "remark-parse";
 import remarkGfm from "remark-gfm";
 import { visit } from "unist-util-visit";
+import { doReplacements } from "./handle-links-modification.js";
 
 //
 // Function: extractMarkdownHyperlinks
@@ -17,38 +18,14 @@ import { visit } from "unist-util-visit";
 // See https://github.com/syntax-tree/mdast for more information on the types of MDAST nodes
 //
 
-function extractMarkdownHyperlinks(markdownText, options = {}) {
-  const { ignorePatterns = [], replacementPatterns = [], baseUrl } = options;
-  const tree = unified()
-    .use(remarkParse)
-    .use(remarkGfm)
-    .parse(markdownText);
+function extractMarkdownHyperlinks(markdownText, options) {
+  const tree = unified().use(remarkParse).use(remarkGfm).parse(markdownText);
 
   const links = [];
-  visit(tree, ['link', 'definition', 'image'], (node) => {
-    let { url } = node;
-    // Skip link checking if it matches any ignore pattern
-    if (ignorePatterns.some(({ pattern }) => {
-      const regex = new RegExp(pattern);
-      return regex.test(url);
-    })) {
-      return;
-    }
-
-    // Prefix the base URL to URLs that start with '/'
-    if (baseUrl && url.startsWith('/')) {
-      url = baseUrl + url;
-    }
-
-    // Replace link URL based on replacement patterns
-    replacementPatterns.forEach(({ pattern, replacement }) => {
-      url = url.replace(new RegExp(pattern), replacement);
-    });
-    node.url = url;
-
+  visit(tree, ["link", "definition", "image"], (node) => {
     links.push(node);
   });
-  return links;
+  return doReplacements(links, options);
 }
 
 export { extractMarkdownHyperlinks };
diff --git a/lib/handle-links-modification.js b/lib/handle-links-modification.js
new file mode 100644
index 0000000..9238828
--- /dev/null
+++ b/lib/handle-links-modification.js
@@ -0,0 +1,42 @@
+/**
+ * Modifies the URLs of the given nodes based on the provided options.
+ *
+ * @param {Array} nodes - The nodes to be modified.
+ * @param {Object} [opts={}] - The options for modifying the URLs.
+ * @param {Array} [opts.ignorePatterns=[]] - The patterns of URLs to be ignored.
+ * @param {Array} [opts.replacementPatterns=[]] - The patterns of URLs to be replaced.
+ * @param {string} [opts.baseUrl] - The base URL to be prefixed to URLs that start with '/'.
+ *
+ * @returns {Array} The modified nodes.
+ */
+function doReplacements(nodes, opts = {}) {
+  const { ignorePatterns = [], replacementPatterns = [], baseUrl } = opts;
+
+  return nodes.filter((node) => {
+    let { url } = node;
+    // Skip link checking if it matches any ignore pattern
+    if (
+      ignorePatterns.some(({ pattern }) => {
+        const regex = new RegExp(pattern);
+        return regex.test(url);
+      })
+    ) {
+      return false; // Exclude this node
+    }
+
+    // Prefix the base URL to URLs that start with '/'
+    if (baseUrl && url.startsWith("/")) {
+      url = baseUrl + url;
+    }
+
+    // Replace link URL based on replacement patterns
+    replacementPatterns.forEach(({ pattern, replacement }) => {
+      url = url.replace(new RegExp(pattern), replacement);
+    });
+    node.url = url;
+
+    return true; // Include this node
+  });
+}
+
+export { doReplacements };
\ No newline at end of file
diff --git a/linkspector.js b/linkspector.js
index 531c2dd..010ab82 100644
--- a/linkspector.js
+++ b/linkspector.js
@@ -5,7 +5,7 @@ import yaml from "js-yaml";
 import { validateConfig } from "./lib/validate-config.js";
 import { prepareFilesList } from "./lib/prepare-file-list.js";
 import { extractMarkdownHyperlinks } from "./lib/extract-markdown-hyperlinks.js";
-import { extractAsciiDocLinks } from "./lib/extract-asciidoc-hyperlinks.js";
+import { extractAsciiDocLinks } from "./lib/extract-asciidoc-links.js";
 import { getUniqueLinks } from "./lib/get-unique-links.js";
 import { checkHyperlinks } from "./lib/batch-check-links.js";
 import { updateLinkStatusObj } from "./lib/update-linkstatus-obj.js";
@@ -127,7 +127,7 @@ export async function* linkspector(configFile, cmd) {
       config.fileExtensions &&
       config.fileExtensions.includes(fileExtension)
     ) {
-      astNodes = await extractAsciiDocLinks(file);
+      astNodes = await extractAsciiDocLinks(file, config);
     } else {
       const fileContent = readFileSync(file, "utf8");
       astNodes = extractMarkdownHyperlinks(fileContent, config);
diff --git a/test/fixtures/asciidoc/test1.adoc b/test/fixtures/asciidoc/test1.adoc
new file mode 100644
index 0000000..9dd3b53
--- /dev/null
+++ b/test/fixtures/asciidoc/test1.adoc
@@ -0,0 +1,3 @@
+Got to (http://www.yttftfftx.com) or [Google](http://www.google.com).
+
+Got to (http://www.yttftfftx.com) or [Google](http://www.google.com).
diff --git a/test/fixtures/asciidoc/test2.adoc b/test/fixtures/asciidoc/test2.adoc
new file mode 100644
index 0000000..78c0757
--- /dev/null
+++ b/test/fixtures/asciidoc/test2.adoc
@@ -0,0 +1 @@
+Got to (http://www.yttftfftx.com) or [Google](http://www.google.com).
\ No newline at end of file

From 9ff040f0b85525823866c25c1082cb10abea7eec Mon Sep 17 00:00:00 2001
From: Gaurav <gaurav-nelson@users.noreply.github.com>
Date: Sun, 12 May 2024 15:59:45 +1000
Subject: [PATCH 2/3] Finialized asciidoc hyperlink check, added test

---
 index.test.js                                 | 36 +++++++++++++++++++
 lib/update-linkstatus-obj.js                  | 18 ++++++++++
 linkspector.js                                |  3 --
 .../asciidoc/hyperlinks/hyperlinksTest.yml    |  5 +++
 .../testhyperlinks1.adoc}                     |  0
 test/fixtures/asciidoc/test2.adoc             |  1 -
 6 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml
 rename test/fixtures/asciidoc/{test1.adoc => hyperlinks/testhyperlinks1.adoc} (100%)
 delete mode 100644 test/fixtures/asciidoc/test2.adoc

diff --git a/index.test.js b/index.test.js
index c1ab741..9f663f2 100644
--- a/index.test.js
+++ b/index.test.js
@@ -148,3 +148,39 @@ test("linkspector should add back the removed duplicates when returning the resu
   expect(results[2].status).toBe("alive");
   expect(results[3].status).toBe("error");
 });
+
+test(" linkspector should check hyperlinks in AsciiDoc files", async () => {
+  let hasErrorLinks = false;
+  let currentFile = ""; // Variable to store the current file name
+  let results = []; // Array to store the results if json is true
+
+  for await (const { file, result } of linkspector(
+    "./test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml",
+    cmd
+  )) {
+    currentFile = file;
+    for (const linkStatusObj of result) {
+      if (cmd.json) {
+        results.push({
+          file: currentFile,
+          link: linkStatusObj.link,
+          status_code: linkStatusObj.status_code,
+          line_number: linkStatusObj.line_number,
+          position: linkStatusObj.position,
+          status: linkStatusObj.status,
+          error_message: linkStatusObj.error_message,
+        });
+      }
+      if (linkStatusObj.status === "error") {
+        hasErrorLinks = true;
+      }
+    }
+  }
+
+  expect(hasErrorLinks).toBe(true);
+  expect(results.length).toBe(4);
+  expect(results[0].status).toBe("error");
+  expect(results[1].status).toBe("alive");
+  expect(results[2].status).toBe("error");
+  expect(results[3].status).toBe("alive");
+});
\ No newline at end of file
diff --git a/lib/update-linkstatus-obj.js b/lib/update-linkstatus-obj.js
index 19381b5..41f102b 100644
--- a/lib/update-linkstatus-obj.js
+++ b/lib/update-linkstatus-obj.js
@@ -1,3 +1,15 @@
+/**
+ * Updates the link status object with the given AST nodes and existing link status.
+ *
+ * @param {Array} astNodes - The AST nodes to update the link status with.
+ * Each node is an object with properties `url`, `position`, `title`, and `children`.
+ *
+ * @param {Array} linkStatus - The existing link status to update.
+ * Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`.
+ *
+ * @returns {Array} The updated link status. Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`.
+ * The returned array is sorted by line number and start column in ascending order.
+ */
 "use strict";
 
 function updateLinkStatusObj(astNodes, linkStatus) {
@@ -32,6 +44,12 @@ function updateLinkStatusObj(astNodes, linkStatus) {
       });
     }
   });
+  updatedLinkStatus.sort((a, b) => {
+    if (a.position.start.line === b.position.start.line) {
+      return a.position.start.column - b.position.start.column;
+    }
+    return a.position.start.line - b.position.start.line;
+  });
   return updatedLinkStatus;
 }
 
diff --git a/linkspector.js b/linkspector.js
index 010ab82..0727365 100644
--- a/linkspector.js
+++ b/linkspector.js
@@ -109,9 +109,6 @@ export async function* linkspector(configFile, cmd) {
     filesToCheck = modifiedFilesToCheck;
   }
 
-  // Initialize an array to store link status objects
-  let linkStatusObjects = [];
-
   // Process each file
   for (const file of filesToCheck) {
     const relativeFilePath = path.relative(process.cwd(), file);
diff --git a/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml b/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml
new file mode 100644
index 0000000..90b24ba
--- /dev/null
+++ b/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml
@@ -0,0 +1,5 @@
+dirs:
+  - ./test/fixtures/asciidoc/hyperlinks/
+fileExtensions:
+  - adoc
+useGitIgnore: true
diff --git a/test/fixtures/asciidoc/test1.adoc b/test/fixtures/asciidoc/hyperlinks/testhyperlinks1.adoc
similarity index 100%
rename from test/fixtures/asciidoc/test1.adoc
rename to test/fixtures/asciidoc/hyperlinks/testhyperlinks1.adoc
diff --git a/test/fixtures/asciidoc/test2.adoc b/test/fixtures/asciidoc/test2.adoc
deleted file mode 100644
index 78c0757..0000000
--- a/test/fixtures/asciidoc/test2.adoc
+++ /dev/null
@@ -1 +0,0 @@
-Got to (http://www.yttftfftx.com) or [Google](http://www.google.com).
\ No newline at end of file

From 9f40982d5a30057ea8d208ea7c3dd4f43726b4d2 Mon Sep 17 00:00:00 2001
From: Gaurav <gaurav-nelson@users.noreply.github.com>
Date: Sun, 12 May 2024 16:39:04 +1000
Subject: [PATCH 3/3] Fix escaping issues, remove unused file

---
 lib/extract-asciidoc-hyperlinks.js | 100 -----------------------------
 lib/extract-asciidoc-links.js      |  78 ++++++++++------------
 2 files changed, 35 insertions(+), 143 deletions(-)
 delete mode 100644 lib/extract-asciidoc-hyperlinks.js

diff --git a/lib/extract-asciidoc-hyperlinks.js b/lib/extract-asciidoc-hyperlinks.js
deleted file mode 100644
index 522cc16..0000000
--- a/lib/extract-asciidoc-hyperlinks.js
+++ /dev/null
@@ -1,100 +0,0 @@
-import fs from "fs";
-import readline from "readline";
-
-function extractAsciiDocLinks(filePath) {
-  return new Promise((resolve) => {
-    const links = [];
-
-    const rl = readline.createInterface({
-      input: fs.createReadStream(filePath),
-      crlfDelay: Infinity,
-    });
-
-    let lineNumber = 0;
-
-    // Updated regular expression to match only the URLs in the specified formats
-    const urlRegex =
-      /(?:https?|ftp|irc|mailto):\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g;
-
-    rl.on("line", (line) => {
-      lineNumber++;
-
-      let match;
-      while ((match = urlRegex.exec(line)) !== null) {
-        const url = match[0].replace(/^link:/, ""); // Remove 'link:' prefix if present
-        const position = {
-          start: { line: lineNumber, column: match.index, offset: match.index },
-          end: {
-            line: lineNumber,
-            column: match.index + match[0].length,
-            offset: match.index + match[0].length,
-          },
-        };
-
-        // Updated logic to extract the optional link text from the line
-        let title = null;
-        let children = [];
-        const linkTextRegex = /\[([^\]]+)\]/g; // Regular expression to match the link text inside brackets
-        linkTextRegex.lastIndex = position.end.offset; // Set the starting index to the end of the URL
-        const linkTextMatch = linkTextRegex.exec(line); // Try to find a link text after the URL
-        if (linkTextMatch) {
-          // If a link text is found, use it as the title and children value
-          title = linkTextMatch[1];
-          children.push({
-            type: "text",
-            value: title,
-            position: {
-              start: {
-                line: lineNumber,
-                column: linkTextMatch.index + 1,
-                offset: linkTextMatch.index + 1,
-              },
-              end: {
-                line: lineNumber,
-                column: linkTextMatch.index + linkTextMatch[0].length - 1,
-                offset: linkTextMatch.index + linkTextMatch[0].length - 1,
-              },
-            },
-          });
-          // Update the position end to include the link text
-          position.end.column += linkTextMatch[0].length;
-          position.end.offset += linkTextMatch[0].length;
-        } else {
-          // If no link text is found, use the URL as the children value
-          children.push({
-            type: "text",
-            value: url,
-            position: {
-              start: {
-                line: lineNumber,
-                column: match.index + 1,
-                offset: position.start.offset + 1,
-              },
-              end: {
-                line: lineNumber,
-                column: match.index + url.length + 1,
-                offset: position.start.offset + url.length + 1,
-              },
-            },
-          });
-        }
-
-        const linkNode = {
-          type: "link",
-          title: title,
-          url: url,
-          children: children,
-          position: position,
-        };
-
-        links.push(linkNode);
-      }
-    });
-
-    rl.on("close", () => {
-      resolve(links);
-    });
-  });
-}
-
-export { extractAsciiDocLinks };
diff --git a/lib/extract-asciidoc-links.js b/lib/extract-asciidoc-links.js
index fb11f6e..5c8b190 100644
--- a/lib/extract-asciidoc-links.js
+++ b/lib/extract-asciidoc-links.js
@@ -136,11 +136,9 @@ function extractAsciiDocLinks(filePath, options) {
         return;
       }
       if (line.match(/\[#[^\]]+\]/g)) {
-        let extractLink = line.match(/\[#[^\]]+\]/g);
-        for (let i = 0; i < extractLink.length; i++) {
-          let newAnchor = extractLink[i];
-          newAnchor = newAnchor.replace("[#", "");
-          newAnchor = newAnchor.replace("]", "");
+        const extractLink = line.match(/\[#[^\]]+\]/g);
+        extractLink.forEach((link) => {
+          const newAnchor = link.replace(/^\[#|]$/g, "");
           const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
           const startColumn = matchIndex + 2; // Add 2 to account for the [# characters
           const endColumn = startColumn + newAnchor.length;
@@ -166,17 +164,15 @@ function extractAsciiDocLinks(filePath, options) {
             position,
           };
           internalRefs.set(newAnchor, linkNode);
-        }
+        });
         return;
       }
-      if (line.match(/(anchor:)[^\[]+\[[^\]]*\]/g)) {
-        let extractLink = line.match(/(anchor:)[^\[]+\[/g);
-        for (let i = 0; i < extractLink.length; i++) {
-          let newAnchor = extractLink[i];
-          newAnchor = newAnchor.replace("anchor:", "");
-          newAnchor = newAnchor.replace("[", "");
+      if (line.match(/(anchor:[^\[]+)\[[^\]]*\]/g)) {
+        let extractLink = line.match(/(anchor:[^\[]+)\[[^\]]*\]/g);
+        extractLink.forEach((link) => {
+          let newAnchor = link.replace(/^anchor:|\[/g, "");
 
-          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+          const matchIndex = line.indexOf(link); // Get the index of the match
           const startColumn = matchIndex + 7; // Add 7 to account for the "anchor:" characters
           const endColumn = startColumn + newAnchor.length;
           const startPosition = {
@@ -201,7 +197,7 @@ function extractAsciiDocLinks(filePath, options) {
             position,
           };
           internalRefs.set(newAnchor, linkNode);
-        }
+        });
         return;
       }
       if (line.match(/<<[^\>]+>>/g)) {
@@ -274,12 +270,10 @@ function extractAsciiDocLinks(filePath, options) {
         return;
       }
       if (line.match(/xref:[^\[]+\[[^\]]*\]/g)) {
-        let extractLink = line.match(/xref:[^\[]+\[/g);
-        for (let i = 0; i < extractLink.length; i++) {
-          let newReference = extractLink[i];
-          newReference = newReference.replace("xref:", "");
-          newReference = newReference.replace("[", "");
-          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+        let extractLink = line.match(/xref:[^\[]+\[[^\]]*\]/g);
+        extractLink.forEach((link) => {
+          let newReference = link.replace(/^xref:|\[/g, "");
+          const matchIndex = line.indexOf(link); // Get the index of the match
           const startColumn = matchIndex + 5; // Add 5 to account for the "xref:" characters
           const endColumn = startColumn + newReference.length;
           const startPosition = {
@@ -295,9 +289,7 @@ function extractAsciiDocLinks(filePath, options) {
           if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) {
             newReference = newReference.replace(
               /(\.adoc|\.asciidoc|\.asc)(#)?/,
-              function (_, extension) {
-                return extension + "#";
-              }
+              (_, extension) => extension + "#"
             );
             const position = {
               start: startPosition,
@@ -325,16 +317,14 @@ function extractAsciiDocLinks(filePath, options) {
             };
             internalRefs.set(newReference, linkNode);
           }
-        }
+        });
         return;
       }
       if (line.match(/link:[^\[]+\[[^\]]*\]/g)) {
-        let extractLink = line.match(/link:[^\[]+\[/g);
-        for (let i = 0; i < extractLink.length; i++) {
-          let newReference = extractLink[i];
-          newReference = newReference.replace("link:", "");
-          newReference = newReference.replace("[", "");
-          const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match
+        let extractLink = line.match(/link:[^\[]+\[[^\]]*\]/g);
+        extractLink.forEach((link) => {
+          let newReference = link.replace(/^link:|\[/g, "");
+          const matchIndex = line.indexOf(link); // Get the index of the match
           const startColumn = matchIndex + 5; // Add 5 to account for the "link:" characters
           const endColumn = startColumn + newReference.length;
           const startPosition = {
@@ -347,29 +337,31 @@ function extractAsciiDocLinks(filePath, options) {
             column: endColumn,
             offset: matchIndex + newReference.length,
           };
-          if (newReference.match(/^(http|https):\/\//g)) {
+          if (newReference.match(/^(https?:\/\/|ftp|irc|mailto):\/\//g)) {
             const position = {
               start: startPosition,
               end: endPosition,
             };
             const linkNode = {
-              type: "link",
+              type: newReference.startsWith("http") ? "link" : "external-ref",
               title: null,
               url: newReference,
               children: [],
               position,
             };
-            const existingLink = links.find(
-              (link) =>
-                link.url === linkNode.url &&
-                link.position.start.line === linkNode.position.start.line &&
-                link.position.start.column === linkNode.position.start.column
-            );
-            if (!existingLink) {
-              links.push(linkNode); // Add link to the array only if it's not already there
+            if (linkNode.type === "link") {
+              const existingLink = links.find(
+                (link) =>
+                  link.url === linkNode.url &&
+                  link.position.start.line === linkNode.position.start.line &&
+                  link.position.start.column === linkNode.position.start.column
+              );
+              if (!existingLink) {
+                links.push(linkNode); // Add link to the array only if it's not already there
+              }
+            } else {
+              externalRefs.set(newReference, linkNode);
             }
-          } else if (newReference.match(/^(ftp|irc|mailto):\/\//g)) {
-            // we currently don't handle these
           } else {
             newReference = newReference.replace(/(\.html?5?)#.*/, "$1");
             const position = {
@@ -385,7 +377,7 @@ function extractAsciiDocLinks(filePath, options) {
             };
             externalRefs.set(newReference, linkNode);
           }
-        }
+        });
         return;
       }
       if (