From f416134541d20afb93fc30c677c1966e6ade7054 Mon Sep 17 00:00:00 2001 From: Gaurav Date: Sun, 12 May 2024 10:21:08 +1000 Subject: [PATCH 1/3] Added asciidoc extraction capabilities, fixed asciidoc hyperlinks check --- index.js | 4 +- lib/extract-asciidoc-links.js | 450 +++++++++++++++++++++++++++++ lib/extract-markdown-hyperlinks.js | 33 +-- lib/handle-links-modification.js | 42 +++ linkspector.js | 4 +- test/fixtures/asciidoc/test1.adoc | 3 + test/fixtures/asciidoc/test2.adoc | 1 + 7 files changed, 505 insertions(+), 32 deletions(-) create mode 100644 lib/extract-asciidoc-links.js create mode 100644 lib/handle-links-modification.js create mode 100644 test/fixtures/asciidoc/test1.adoc create mode 100644 test/fixtures/asciidoc/test2.adoc diff --git a/index.js b/index.js index eb33047..a5feb4d 100755 --- a/index.js +++ b/index.js @@ -46,7 +46,7 @@ program if (linkStatusObj.status === "error") { if (cmd.json) { results.diagnostics.push({ - message: linkStatusObj.error_message, + message: `Connot reach ${linkStatusObj.link}. Status: ${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ''}`, location: { path: currentFile, range: { @@ -67,7 +67,7 @@ program spinner.stop(); console.log( kleur.red( - `💥 ${currentFile} - Line ${linkStatusObj.line_number}: ${linkStatusObj.error_message}` + `🚫 ${currentFile}, ${linkStatusObj.link} , ${linkStatusObj.status_code}, ${linkStatusObj.line_number}, ${linkStatusObj.error_message}` ) ); spinner.start(`Checking ${currentFile}...\n`); diff --git a/lib/extract-asciidoc-links.js b/lib/extract-asciidoc-links.js new file mode 100644 index 0000000..fb11f6e --- /dev/null +++ b/lib/extract-asciidoc-links.js @@ -0,0 +1,450 @@ +import fs from "fs"; +import readline from "readline"; +import { doReplacements } from "./handle-links-modification.js"; + +function extractAsciiDocLinks(filePath, options) { + return new Promise((resolve) => { + const links = []; + const internalRefs = new Map(); + const externalRefs = new Map(); + const externalURLs = new Map(); + + let insideCommentBlock = false; + + const rl = readline.createInterface({ + input: fs.createReadStream(filePath), + crlfDelay: Infinity, + }); + + let lineNumber = 0; + + const urlRegex = + /(?:https?|ftp|irc|mailto):\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g; + + rl.on("line", (line) => { + lineNumber++; + // Ignore comment blocks + if (line.startsWith("////")) { + insideCommentBlock = !insideCommentBlock; + } + if (insideCommentBlock) { + return; + } + // Ignore single-line comments + if (line.startsWith("//")) { + return; + } + // Extract external hyperlinks + let match; + while ((match = urlRegex.exec(line)) !== null) { + const url = match[0].replace(/^link:/, ""); // Remove 'link:' prefix if present + const position = { + start: { line: lineNumber, column: match.index, offset: match.index }, + end: { + line: lineNumber, + column: match.index + match[0].length, + offset: match.index + match[0].length, + }, + }; + const linkNode = { + type: "link", + title: null, + url, + children: [], + position, + }; + const existingLink = links.find( + (link) => + link.url === linkNode.url && + link.position.start.line === linkNode.position.start.line && + link.position.start.column === linkNode.position.start.column + ); + if (!existingLink) { + links.push(linkNode); // Add link to the array only if it's not already there + } + continue; + } + // Extract internal and external references + if (line.match(/\[\[[^\]]+\]\]/g)) { + let extractLink = line.match(/\[\[[^\]]+\]\]/g); + for (let i = 0; i < extractLink.length; i++) { + let newAnchor = extractLink[i]; + newAnchor = newAnchor.replace("[[", ""); + newAnchor = newAnchor.replace("]]", ""); + newAnchor = newAnchor.replace(/,.*/g, ""); // take into account ',' + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 2; // Add 2 to account for the [[ characters + const endColumn = startColumn + newAnchor.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newAnchor.length, + }; + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newAnchor, + children: [], + position, + }; + internalRefs.set(newAnchor, linkNode); + } + return; + } + if (line.match(/^[\s]*[\*\-][\s]+\[\[\[[^\]]+\]\]\]/g)) { + let extractLink = line.match(/\[\[\[[^\]]+\]\]\]/g); + for (let i = 0; i < extractLink.length; i++) { + let newAnchor = extractLink[i]; + newAnchor = newAnchor.replace("[[[", ""); + newAnchor = newAnchor.replace("]]]", ""); + newAnchor = newAnchor.replace(/,.*/g, ""); // take into account ',' + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 4; // Add 4 to account for the [*-] and [[[ characters + const endColumn = startColumn + newAnchor.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newAnchor.length, + }; + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newAnchor, + children: [], + position, + }; + internalRefs.set(newAnchor, linkNode); + } + return; + } + if (line.match(/\[#[^\]]+\]/g)) { + let extractLink = line.match(/\[#[^\]]+\]/g); + for (let i = 0; i < extractLink.length; i++) { + let newAnchor = extractLink[i]; + newAnchor = newAnchor.replace("[#", ""); + newAnchor = newAnchor.replace("]", ""); + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 2; // Add 2 to account for the [# characters + const endColumn = startColumn + newAnchor.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newAnchor.length, + }; + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newAnchor, + children: [], + position, + }; + internalRefs.set(newAnchor, linkNode); + } + return; + } + if (line.match(/(anchor:)[^\[]+\[[^\]]*\]/g)) { + let extractLink = line.match(/(anchor:)[^\[]+\[/g); + for (let i = 0; i < extractLink.length; i++) { + let newAnchor = extractLink[i]; + newAnchor = newAnchor.replace("anchor:", ""); + newAnchor = newAnchor.replace("[", ""); + + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 7; // Add 7 to account for the "anchor:" characters + const endColumn = startColumn + newAnchor.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newAnchor.length, + }; + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newAnchor, + children: [], + position, + }; + internalRefs.set(newAnchor, linkNode); + } + return; + } + if (line.match(/<<[^\>]+>>/g)) { + let extractLink = line.match(/<<[^\>]+>>/g); + for (let i = 0; i < extractLink.length; i++) { + let newReference = extractLink[i]; + newReference = newReference.replace("<<", ""); + newReference = newReference.replace(">>", ""); + newReference = newReference.replace(/,.*/g, ""); // take into account <> + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 2; // Add 2 to account for the << characters + const endColumn = startColumn + newReference.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newReference.length, + }; + if (newReference.startsWith("#")) { + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newReference, + children: [], + position, + }; + internalRefs.set(newReference, linkNode); + } else if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) { + newReference = newReference.replace( + /(\.adoc|\.asciidoc|\.asc)(#)?/, + function (_, extension) { + return extension + "#"; + } + ); + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "external-ref", + title: null, + url: newReference, + children: [], + position, + }; + externalRefs.set(newReference, linkNode); + } else { + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newReference, + children: [], + position, + }; + internalRefs.set(newReference, linkNode); + } + } + return; + } + if (line.match(/xref:[^\[]+\[[^\]]*\]/g)) { + let extractLink = line.match(/xref:[^\[]+\[/g); + for (let i = 0; i < extractLink.length; i++) { + let newReference = extractLink[i]; + newReference = newReference.replace("xref:", ""); + newReference = newReference.replace("[", ""); + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 5; // Add 5 to account for the "xref:" characters + const endColumn = startColumn + newReference.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newReference.length, + }; + if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) { + newReference = newReference.replace( + /(\.adoc|\.asciidoc|\.asc)(#)?/, + function (_, extension) { + return extension + "#"; + } + ); + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "external-ref", + title: null, + url: newReference, + children: [], + position, + }; + externalRefs.set(newReference, linkNode); + } else { + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "internal-ref", + title: null, + url: newReference, + children: [], + position, + }; + internalRefs.set(newReference, linkNode); + } + } + return; + } + if (line.match(/link:[^\[]+\[[^\]]*\]/g)) { + let extractLink = line.match(/link:[^\[]+\[/g); + for (let i = 0; i < extractLink.length; i++) { + let newReference = extractLink[i]; + newReference = newReference.replace("link:", ""); + newReference = newReference.replace("[", ""); + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex + 5; // Add 5 to account for the "link:" characters + const endColumn = startColumn + newReference.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newReference.length, + }; + if (newReference.match(/^(http|https):\/\//g)) { + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "link", + title: null, + url: newReference, + children: [], + position, + }; + const existingLink = links.find( + (link) => + link.url === linkNode.url && + link.position.start.line === linkNode.position.start.line && + link.position.start.column === linkNode.position.start.column + ); + if (!existingLink) { + links.push(linkNode); // Add link to the array only if it's not already there + } + } else if (newReference.match(/^(ftp|irc|mailto):\/\//g)) { + // we currently don't handle these + } else { + newReference = newReference.replace(/(\.html?5?)#.*/, "$1"); + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "external-ref", + title: null, + url: newReference, + children: [], + position, + }; + externalRefs.set(newReference, linkNode); + } + } + return; + } + if ( + line.match( + /(?:^|<|[\s>\(\)\[\];])((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g + ) + ) { + let extractLink = line.match( + /((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g + ); + for (let i = 0; i < extractLink.length; i++) { + let newReference = extractLink[i]; + const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const startColumn = matchIndex; + const endColumn = startColumn + newReference.length; + const startPosition = { + line: lineNumber, + column: startColumn, + offset: matchIndex, + }; + const endPosition = { + line: lineNumber, + column: endColumn, + offset: matchIndex + newReference.length, + }; + const position = { + start: startPosition, + end: endPosition, + }; + const linkNode = { + type: "link", + title: null, + url: newReference, + children: [], + position, + }; + const existingLink = links.find( + (link) => + link.url === linkNode.url && + link.position.start.line === linkNode.position.start.line && + link.position.start.column === linkNode.position.start.column + ); + if (!existingLink) { + links.push(linkNode); // Add link to the array only if it's not already there + } + } + return; + } + }); + rl.on("close", () => { + const result = [ + ...links.values(), + ...internalRefs.values(), + ...externalRefs.values(), + ...externalURLs.values(), + ]; + resolve(doReplacements(result, options)); + }); + }); +} + +export { extractAsciiDocLinks }; diff --git a/lib/extract-markdown-hyperlinks.js b/lib/extract-markdown-hyperlinks.js index 69b6abb..28f866c 100644 --- a/lib/extract-markdown-hyperlinks.js +++ b/lib/extract-markdown-hyperlinks.js @@ -2,6 +2,7 @@ import { unified } from "unified"; import remarkParse from "remark-parse"; import remarkGfm from "remark-gfm"; import { visit } from "unist-util-visit"; +import { doReplacements } from "./handle-links-modification.js"; // // Function: extractMarkdownHyperlinks @@ -17,38 +18,14 @@ import { visit } from "unist-util-visit"; // See https://github.com/syntax-tree/mdast for more information on the types of MDAST nodes // -function extractMarkdownHyperlinks(markdownText, options = {}) { - const { ignorePatterns = [], replacementPatterns = [], baseUrl } = options; - const tree = unified() - .use(remarkParse) - .use(remarkGfm) - .parse(markdownText); +function extractMarkdownHyperlinks(markdownText, options) { + const tree = unified().use(remarkParse).use(remarkGfm).parse(markdownText); const links = []; - visit(tree, ['link', 'definition', 'image'], (node) => { - let { url } = node; - // Skip link checking if it matches any ignore pattern - if (ignorePatterns.some(({ pattern }) => { - const regex = new RegExp(pattern); - return regex.test(url); - })) { - return; - } - - // Prefix the base URL to URLs that start with '/' - if (baseUrl && url.startsWith('/')) { - url = baseUrl + url; - } - - // Replace link URL based on replacement patterns - replacementPatterns.forEach(({ pattern, replacement }) => { - url = url.replace(new RegExp(pattern), replacement); - }); - node.url = url; - + visit(tree, ["link", "definition", "image"], (node) => { links.push(node); }); - return links; + return doReplacements(links, options); } export { extractMarkdownHyperlinks }; diff --git a/lib/handle-links-modification.js b/lib/handle-links-modification.js new file mode 100644 index 0000000..9238828 --- /dev/null +++ b/lib/handle-links-modification.js @@ -0,0 +1,42 @@ +/** + * Modifies the URLs of the given nodes based on the provided options. + * + * @param {Array} nodes - The nodes to be modified. + * @param {Object} [opts={}] - The options for modifying the URLs. + * @param {Array} [opts.ignorePatterns=[]] - The patterns of URLs to be ignored. + * @param {Array} [opts.replacementPatterns=[]] - The patterns of URLs to be replaced. + * @param {string} [opts.baseUrl] - The base URL to be prefixed to URLs that start with '/'. + * + * @returns {Array} The modified nodes. + */ +function doReplacements(nodes, opts = {}) { + const { ignorePatterns = [], replacementPatterns = [], baseUrl } = opts; + + return nodes.filter((node) => { + let { url } = node; + // Skip link checking if it matches any ignore pattern + if ( + ignorePatterns.some(({ pattern }) => { + const regex = new RegExp(pattern); + return regex.test(url); + }) + ) { + return false; // Exclude this node + } + + // Prefix the base URL to URLs that start with '/' + if (baseUrl && url.startsWith("/")) { + url = baseUrl + url; + } + + // Replace link URL based on replacement patterns + replacementPatterns.forEach(({ pattern, replacement }) => { + url = url.replace(new RegExp(pattern), replacement); + }); + node.url = url; + + return true; // Include this node + }); +} + +export { doReplacements }; \ No newline at end of file diff --git a/linkspector.js b/linkspector.js index 531c2dd..010ab82 100644 --- a/linkspector.js +++ b/linkspector.js @@ -5,7 +5,7 @@ import yaml from "js-yaml"; import { validateConfig } from "./lib/validate-config.js"; import { prepareFilesList } from "./lib/prepare-file-list.js"; import { extractMarkdownHyperlinks } from "./lib/extract-markdown-hyperlinks.js"; -import { extractAsciiDocLinks } from "./lib/extract-asciidoc-hyperlinks.js"; +import { extractAsciiDocLinks } from "./lib/extract-asciidoc-links.js"; import { getUniqueLinks } from "./lib/get-unique-links.js"; import { checkHyperlinks } from "./lib/batch-check-links.js"; import { updateLinkStatusObj } from "./lib/update-linkstatus-obj.js"; @@ -127,7 +127,7 @@ export async function* linkspector(configFile, cmd) { config.fileExtensions && config.fileExtensions.includes(fileExtension) ) { - astNodes = await extractAsciiDocLinks(file); + astNodes = await extractAsciiDocLinks(file, config); } else { const fileContent = readFileSync(file, "utf8"); astNodes = extractMarkdownHyperlinks(fileContent, config); diff --git a/test/fixtures/asciidoc/test1.adoc b/test/fixtures/asciidoc/test1.adoc new file mode 100644 index 0000000..9dd3b53 --- /dev/null +++ b/test/fixtures/asciidoc/test1.adoc @@ -0,0 +1,3 @@ +Got to (http://www.yttftfftx.com) or [Google](http://www.google.com). + +Got to (http://www.yttftfftx.com) or [Google](http://www.google.com). diff --git a/test/fixtures/asciidoc/test2.adoc b/test/fixtures/asciidoc/test2.adoc new file mode 100644 index 0000000..78c0757 --- /dev/null +++ b/test/fixtures/asciidoc/test2.adoc @@ -0,0 +1 @@ +Got to (http://www.yttftfftx.com) or [Google](http://www.google.com). \ No newline at end of file From 9ff040f0b85525823866c25c1082cb10abea7eec Mon Sep 17 00:00:00 2001 From: Gaurav Date: Sun, 12 May 2024 15:59:45 +1000 Subject: [PATCH 2/3] Finialized asciidoc hyperlink check, added test --- index.test.js | 36 +++++++++++++++++++ lib/update-linkstatus-obj.js | 18 ++++++++++ linkspector.js | 3 -- .../asciidoc/hyperlinks/hyperlinksTest.yml | 5 +++ .../testhyperlinks1.adoc} | 0 test/fixtures/asciidoc/test2.adoc | 1 - 6 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml rename test/fixtures/asciidoc/{test1.adoc => hyperlinks/testhyperlinks1.adoc} (100%) delete mode 100644 test/fixtures/asciidoc/test2.adoc diff --git a/index.test.js b/index.test.js index c1ab741..9f663f2 100644 --- a/index.test.js +++ b/index.test.js @@ -148,3 +148,39 @@ test("linkspector should add back the removed duplicates when returning the resu expect(results[2].status).toBe("alive"); expect(results[3].status).toBe("error"); }); + +test(" linkspector should check hyperlinks in AsciiDoc files", async () => { + let hasErrorLinks = false; + let currentFile = ""; // Variable to store the current file name + let results = []; // Array to store the results if json is true + + for await (const { file, result } of linkspector( + "./test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml", + cmd + )) { + currentFile = file; + for (const linkStatusObj of result) { + if (cmd.json) { + results.push({ + file: currentFile, + link: linkStatusObj.link, + status_code: linkStatusObj.status_code, + line_number: linkStatusObj.line_number, + position: linkStatusObj.position, + status: linkStatusObj.status, + error_message: linkStatusObj.error_message, + }); + } + if (linkStatusObj.status === "error") { + hasErrorLinks = true; + } + } + } + + expect(hasErrorLinks).toBe(true); + expect(results.length).toBe(4); + expect(results[0].status).toBe("error"); + expect(results[1].status).toBe("alive"); + expect(results[2].status).toBe("error"); + expect(results[3].status).toBe("alive"); +}); \ No newline at end of file diff --git a/lib/update-linkstatus-obj.js b/lib/update-linkstatus-obj.js index 19381b5..41f102b 100644 --- a/lib/update-linkstatus-obj.js +++ b/lib/update-linkstatus-obj.js @@ -1,3 +1,15 @@ +/** + * Updates the link status object with the given AST nodes and existing link status. + * + * @param {Array} astNodes - The AST nodes to update the link status with. + * Each node is an object with properties `url`, `position`, `title`, and `children`. + * + * @param {Array} linkStatus - The existing link status to update. + * Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`. + * + * @returns {Array} The updated link status. Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`. + * The returned array is sorted by line number and start column in ascending order. + */ "use strict"; function updateLinkStatusObj(astNodes, linkStatus) { @@ -32,6 +44,12 @@ function updateLinkStatusObj(astNodes, linkStatus) { }); } }); + updatedLinkStatus.sort((a, b) => { + if (a.position.start.line === b.position.start.line) { + return a.position.start.column - b.position.start.column; + } + return a.position.start.line - b.position.start.line; + }); return updatedLinkStatus; } diff --git a/linkspector.js b/linkspector.js index 010ab82..0727365 100644 --- a/linkspector.js +++ b/linkspector.js @@ -109,9 +109,6 @@ export async function* linkspector(configFile, cmd) { filesToCheck = modifiedFilesToCheck; } - // Initialize an array to store link status objects - let linkStatusObjects = []; - // Process each file for (const file of filesToCheck) { const relativeFilePath = path.relative(process.cwd(), file); diff --git a/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml b/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml new file mode 100644 index 0000000..90b24ba --- /dev/null +++ b/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml @@ -0,0 +1,5 @@ +dirs: + - ./test/fixtures/asciidoc/hyperlinks/ +fileExtensions: + - adoc +useGitIgnore: true diff --git a/test/fixtures/asciidoc/test1.adoc b/test/fixtures/asciidoc/hyperlinks/testhyperlinks1.adoc similarity index 100% rename from test/fixtures/asciidoc/test1.adoc rename to test/fixtures/asciidoc/hyperlinks/testhyperlinks1.adoc diff --git a/test/fixtures/asciidoc/test2.adoc b/test/fixtures/asciidoc/test2.adoc deleted file mode 100644 index 78c0757..0000000 --- a/test/fixtures/asciidoc/test2.adoc +++ /dev/null @@ -1 +0,0 @@ -Got to (http://www.yttftfftx.com) or [Google](http://www.google.com). \ No newline at end of file From 9f40982d5a30057ea8d208ea7c3dd4f43726b4d2 Mon Sep 17 00:00:00 2001 From: Gaurav Date: Sun, 12 May 2024 16:39:04 +1000 Subject: [PATCH 3/3] Fix escaping issues, remove unused file --- lib/extract-asciidoc-hyperlinks.js | 100 ----------------------------- lib/extract-asciidoc-links.js | 78 ++++++++++------------ 2 files changed, 35 insertions(+), 143 deletions(-) delete mode 100644 lib/extract-asciidoc-hyperlinks.js diff --git a/lib/extract-asciidoc-hyperlinks.js b/lib/extract-asciidoc-hyperlinks.js deleted file mode 100644 index 522cc16..0000000 --- a/lib/extract-asciidoc-hyperlinks.js +++ /dev/null @@ -1,100 +0,0 @@ -import fs from "fs"; -import readline from "readline"; - -function extractAsciiDocLinks(filePath) { - return new Promise((resolve) => { - const links = []; - - const rl = readline.createInterface({ - input: fs.createReadStream(filePath), - crlfDelay: Infinity, - }); - - let lineNumber = 0; - - // Updated regular expression to match only the URLs in the specified formats - const urlRegex = - /(?:https?|ftp|irc|mailto):\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g; - - rl.on("line", (line) => { - lineNumber++; - - let match; - while ((match = urlRegex.exec(line)) !== null) { - const url = match[0].replace(/^link:/, ""); // Remove 'link:' prefix if present - const position = { - start: { line: lineNumber, column: match.index, offset: match.index }, - end: { - line: lineNumber, - column: match.index + match[0].length, - offset: match.index + match[0].length, - }, - }; - - // Updated logic to extract the optional link text from the line - let title = null; - let children = []; - const linkTextRegex = /\[([^\]]+)\]/g; // Regular expression to match the link text inside brackets - linkTextRegex.lastIndex = position.end.offset; // Set the starting index to the end of the URL - const linkTextMatch = linkTextRegex.exec(line); // Try to find a link text after the URL - if (linkTextMatch) { - // If a link text is found, use it as the title and children value - title = linkTextMatch[1]; - children.push({ - type: "text", - value: title, - position: { - start: { - line: lineNumber, - column: linkTextMatch.index + 1, - offset: linkTextMatch.index + 1, - }, - end: { - line: lineNumber, - column: linkTextMatch.index + linkTextMatch[0].length - 1, - offset: linkTextMatch.index + linkTextMatch[0].length - 1, - }, - }, - }); - // Update the position end to include the link text - position.end.column += linkTextMatch[0].length; - position.end.offset += linkTextMatch[0].length; - } else { - // If no link text is found, use the URL as the children value - children.push({ - type: "text", - value: url, - position: { - start: { - line: lineNumber, - column: match.index + 1, - offset: position.start.offset + 1, - }, - end: { - line: lineNumber, - column: match.index + url.length + 1, - offset: position.start.offset + url.length + 1, - }, - }, - }); - } - - const linkNode = { - type: "link", - title: title, - url: url, - children: children, - position: position, - }; - - links.push(linkNode); - } - }); - - rl.on("close", () => { - resolve(links); - }); - }); -} - -export { extractAsciiDocLinks }; diff --git a/lib/extract-asciidoc-links.js b/lib/extract-asciidoc-links.js index fb11f6e..5c8b190 100644 --- a/lib/extract-asciidoc-links.js +++ b/lib/extract-asciidoc-links.js @@ -136,11 +136,9 @@ function extractAsciiDocLinks(filePath, options) { return; } if (line.match(/\[#[^\]]+\]/g)) { - let extractLink = line.match(/\[#[^\]]+\]/g); - for (let i = 0; i < extractLink.length; i++) { - let newAnchor = extractLink[i]; - newAnchor = newAnchor.replace("[#", ""); - newAnchor = newAnchor.replace("]", ""); + const extractLink = line.match(/\[#[^\]]+\]/g); + extractLink.forEach((link) => { + const newAnchor = link.replace(/^\[#|]$/g, ""); const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match const startColumn = matchIndex + 2; // Add 2 to account for the [# characters const endColumn = startColumn + newAnchor.length; @@ -166,17 +164,15 @@ function extractAsciiDocLinks(filePath, options) { position, }; internalRefs.set(newAnchor, linkNode); - } + }); return; } - if (line.match(/(anchor:)[^\[]+\[[^\]]*\]/g)) { - let extractLink = line.match(/(anchor:)[^\[]+\[/g); - for (let i = 0; i < extractLink.length; i++) { - let newAnchor = extractLink[i]; - newAnchor = newAnchor.replace("anchor:", ""); - newAnchor = newAnchor.replace("[", ""); + if (line.match(/(anchor:[^\[]+)\[[^\]]*\]/g)) { + let extractLink = line.match(/(anchor:[^\[]+)\[[^\]]*\]/g); + extractLink.forEach((link) => { + let newAnchor = link.replace(/^anchor:|\[/g, ""); - const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + const matchIndex = line.indexOf(link); // Get the index of the match const startColumn = matchIndex + 7; // Add 7 to account for the "anchor:" characters const endColumn = startColumn + newAnchor.length; const startPosition = { @@ -201,7 +197,7 @@ function extractAsciiDocLinks(filePath, options) { position, }; internalRefs.set(newAnchor, linkNode); - } + }); return; } if (line.match(/<<[^\>]+>>/g)) { @@ -274,12 +270,10 @@ function extractAsciiDocLinks(filePath, options) { return; } if (line.match(/xref:[^\[]+\[[^\]]*\]/g)) { - let extractLink = line.match(/xref:[^\[]+\[/g); - for (let i = 0; i < extractLink.length; i++) { - let newReference = extractLink[i]; - newReference = newReference.replace("xref:", ""); - newReference = newReference.replace("[", ""); - const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + let extractLink = line.match(/xref:[^\[]+\[[^\]]*\]/g); + extractLink.forEach((link) => { + let newReference = link.replace(/^xref:|\[/g, ""); + const matchIndex = line.indexOf(link); // Get the index of the match const startColumn = matchIndex + 5; // Add 5 to account for the "xref:" characters const endColumn = startColumn + newReference.length; const startPosition = { @@ -295,9 +289,7 @@ function extractAsciiDocLinks(filePath, options) { if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) { newReference = newReference.replace( /(\.adoc|\.asciidoc|\.asc)(#)?/, - function (_, extension) { - return extension + "#"; - } + (_, extension) => extension + "#" ); const position = { start: startPosition, @@ -325,16 +317,14 @@ function extractAsciiDocLinks(filePath, options) { }; internalRefs.set(newReference, linkNode); } - } + }); return; } if (line.match(/link:[^\[]+\[[^\]]*\]/g)) { - let extractLink = line.match(/link:[^\[]+\[/g); - for (let i = 0; i < extractLink.length; i++) { - let newReference = extractLink[i]; - newReference = newReference.replace("link:", ""); - newReference = newReference.replace("[", ""); - const matchIndex = line.indexOf(extractLink[i]); // Get the index of the match + let extractLink = line.match(/link:[^\[]+\[[^\]]*\]/g); + extractLink.forEach((link) => { + let newReference = link.replace(/^link:|\[/g, ""); + const matchIndex = line.indexOf(link); // Get the index of the match const startColumn = matchIndex + 5; // Add 5 to account for the "link:" characters const endColumn = startColumn + newReference.length; const startPosition = { @@ -347,29 +337,31 @@ function extractAsciiDocLinks(filePath, options) { column: endColumn, offset: matchIndex + newReference.length, }; - if (newReference.match(/^(http|https):\/\//g)) { + if (newReference.match(/^(https?:\/\/|ftp|irc|mailto):\/\//g)) { const position = { start: startPosition, end: endPosition, }; const linkNode = { - type: "link", + type: newReference.startsWith("http") ? "link" : "external-ref", title: null, url: newReference, children: [], position, }; - const existingLink = links.find( - (link) => - link.url === linkNode.url && - link.position.start.line === linkNode.position.start.line && - link.position.start.column === linkNode.position.start.column - ); - if (!existingLink) { - links.push(linkNode); // Add link to the array only if it's not already there + if (linkNode.type === "link") { + const existingLink = links.find( + (link) => + link.url === linkNode.url && + link.position.start.line === linkNode.position.start.line && + link.position.start.column === linkNode.position.start.column + ); + if (!existingLink) { + links.push(linkNode); // Add link to the array only if it's not already there + } + } else { + externalRefs.set(newReference, linkNode); } - } else if (newReference.match(/^(ftp|irc|mailto):\/\//g)) { - // we currently don't handle these } else { newReference = newReference.replace(/(\.html?5?)#.*/, "$1"); const position = { @@ -385,7 +377,7 @@ function extractAsciiDocLinks(filePath, options) { }; externalRefs.set(newReference, linkNode); } - } + }); return; } if (