Skip to content

Commit

Permalink
Add external link checker
Browse files Browse the repository at this point in the history
  • Loading branch information
Eric-Arellano committed Feb 28, 2024
1 parent 8158062 commit 6b079d1
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 72 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/api-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ on:
- "docs/api/qiskit-ibm-provider/**/*"
- "docs/api/qiskit-ibm-runtime/**/*"
- "public/api/**/*"
- "scripts/checkLinks.ts"
- "scripts/checkInternalLinks.ts"
- "scripts/lib/links/ignores.ts"

jobs:
Expand All @@ -36,7 +36,7 @@ jobs:
run: npm ci
- name: Check internal links
run: >
npm run check:links --
npm run check-internal-links --
--qiskit-release-notes
--current-apis
--dev-apis
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
- name: Spellcheck
run: npm run check:spelling
- name: Internal link checker
run: npm run check:links
run: npm run check-internal-links
- name: Formatting
run: npm run check:fmt
- name: Typecheck
Expand Down
10 changes: 3 additions & 7 deletions .github/workflows/weekly-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ jobs:
run: npm ci
- name: Check external links
run: >
npm run check:links --
--qiskit-release-notes
--current-apis
--dev-apis
--historical-apis
--skip-broken-historical
--external
npm run check-external-links --
'docs/**/*.{md,mdx,ipynb}'
'!docs/api/qiskit/[0-9]*/*'
28 changes: 18 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,27 +178,35 @@ about it.

## Check for broken links

CI will check for broken links. You can also check locally:
We have two broken link checkers: for internal links and for external links.

```bash
# Only check for internal broken links
npm run check:links
To check internal links:

# Enable the validation of external links
npm run check:links -- --external
```bash
# Only check non-API docs
npm run check-internal-links
# By default, only the non-API docs are checked. You can add any of the
# below arguments to also check API docs.
npm run check:links -- --current-apis --dev-apis --historical-apis --qiskit-release-notes
# You can add any of the below arguments to also check API docs.
npm run check-internal-links -- --current-apis --dev-apis --historical-apis --qiskit-release-notes
# However, `--historical-apis` currently has failing versions, so you may
# want to add `--skip-broken-historical`.
npm run check:links -- --historical-apis --skip-broken-historical
npm run check-internal-links -- --historical-apis --skip-broken-historical
# Or, run all the checks. Although this only checks non-API docs.
npm run check
```

To check external links:

```bash
# Specify the files you want after `--`
npm run check-external-links -- docs/run/index.md
# You can also use globs
npm run check-external-links -- 'docs/run/*'
```

## Check file metadata

Every file needs to have a `title` and `description`. The `lint` job in CI will fail with instructions for any bad file.
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
"author": "Qiskit Development Team",
"license": "Apache-2.0",
"scripts": {
"check": "npm run check:metadata && npm run check:spelling && npm run check:links && npm run check:fmt",
"check": "npm run check:metadata && npm run check:spelling && npm run check-internal-links && npm run check:fmt",
"check:metadata": "node -r esbuild-register scripts/commands/checkMetadata.ts",
"check:links": "node -r esbuild-register scripts/commands/checkLinks.ts",
"check:spelling": "cspell --relative --no-progress docs/**/*.md* docs/api/**/*.md* --config cspell/cSpell.json",
"check:fmt": "prettier --check .",
"check-internal-links": "node -r esbuild-register scripts/commands/checkInternalLinks.ts",
"check-external-links": "node -r esbuild-register scripts/commands/checkExternalLinks.ts",
"check-pages-render": "node -r esbuild-register scripts/commands/checkPagesRender.ts",
"fmt": "prettier --write .",
"test": "jest",
Expand Down
78 changes: 78 additions & 0 deletions scripts/commands/checkExternalLinks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { globby } from "globby";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";

import { ExternalLink } from "../lib/links/ExternalLink";
import { parseFile } from "../lib/links/extractLinks";
import { addLinksToMap } from "../lib/links/FileBatch";

interface Arguments {
[x: string]: unknown;
globs?: string[];
}

const readArgs = (): Arguments => {
return yargs(hideBin(process.argv))
.version(false)
.command("$0 [globs..]", "")
.parseSync();
};

async function main() {
const args = readArgs();
const filePaths = await globby(args.globs || []);

const links = await getLinks(filePaths);

// We use a for loop to reduce the risk of rate limiting.
let allGood = true;
let numLinksChecked = 1;
for (const link of links) {
const result = await link.check();

// This script can be slow, so log progress every 20 links.
if (numLinksChecked % 20 == 0) {
console.log(`Checked ${numLinksChecked} / ${links.length} links`);
}
numLinksChecked++;

if (result === undefined) continue;
console.error(result);
allGood = false;
}

if (!allGood) {
console.error(
`\n\n💔 Some external links appear broken, although it's possible they are flakes. Checked ${links.length} links.`,
);
process.exit(1);
}
console.log(
`\n\n✅ No external links are broken. Checked ${links.length} links.`,
);
}

async function getLinks(filePaths: string[]): Promise<ExternalLink[]> {
const linksToOriginFiles = new Map<string, string[]>();
for (const filePath of filePaths) {
const parsed = await parseFile(filePath);
addLinksToMap(filePath, parsed.externalLinks, linksToOriginFiles);
}
return Array.from(linksToOriginFiles.entries()).map(
([link, originFiles]) => new ExternalLink(link, originFiles),
);
}

main().then(() => process.exit());
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ const SYNTHETIC_FILES: string[] = [

interface Arguments {
[x: string]: unknown;
external: boolean;
currentApis: boolean;
devApis: boolean;
historicalApis: boolean;
Expand All @@ -40,12 +39,6 @@ interface Arguments {
const readArgs = (): Arguments => {
return yargs(hideBin(process.argv))
.version(false)
.option("external", {
type: "boolean",
default: false,
description:
"Should external links be checked? This slows down the script, but is useful to check.",
})
.option("current-apis", {
type: "boolean",
default: false,
Expand Down Expand Up @@ -91,7 +84,7 @@ async function main() {

let allGood = true;
for (const fileBatch of fileBatches) {
const allValidLinks = await fileBatch.check(args.external, otherFiles);
const allValidLinks = await fileBatch.checkInternalLinks(otherFiles);
if (!allValidLinks) {
allGood = false;
}
Expand Down
58 changes: 16 additions & 42 deletions scripts/lib/links/FileBatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import { globby } from "globby";

import { InternalLink, File } from "./InternalLink";
import { ExternalLink } from "./ExternalLink";
import {
ALWAYS_IGNORED_URLS,
FILES_TO_IGNORES,
Expand Down Expand Up @@ -59,75 +58,50 @@ export class FileBatch {
/**
* Load and process the file batch.
*
* Returns a triplet:
* Returns a pair:
* 1. A list of `File` objects with their anchors. These represent
* the universe of valid internal links for this batch, other
* than any additional we may add at check-time, e.g. images.
* 2. A list of InternalLink objects to validate.
* 3. A list of ExternalLink objects to validate.
*/
async load(
loadExternalLinks: boolean,
): Promise<[File[], InternalLink[], ExternalLink[]]> {
async load(): Promise<[File[], InternalLink[]]> {
const files: File[] = [];
for (let filePath of this.toLoad) {
const parsed = await parseFile(filePath);
files.push(new File(filePath, parsed.anchors));
}

const internalLinksToOriginFiles = new Map<string, string[]>();
const externalLinksToOriginFiles = new Map<string, string[]>();
const linksToOriginFiles = new Map<string, string[]>();
for (const filePath of this.toCheck) {
const parsed = await parseFile(filePath);
files.push(new File(filePath, parsed.anchors));
addLinksToMap(filePath, parsed.internalLinks, internalLinksToOriginFiles);
if (loadExternalLinks) {
addLinksToMap(
filePath,
parsed.externalLinks,
externalLinksToOriginFiles,
);
}
addLinksToMap(filePath, parsed.internalLinks, linksToOriginFiles);
}

const internalLinks = Array.from(internalLinksToOriginFiles.entries()).map(
const links = Array.from(linksToOriginFiles.entries()).map(
([link, originFiles]) => new InternalLink(link, originFiles),
);
const externalLinks = Array.from(externalLinksToOriginFiles.entries()).map(
([link, originFiles]) => new ExternalLink(link, originFiles),
);
return [files, internalLinks, externalLinks];
return [files, links];
}

/**
* Check that all links in this file batch are valid.
* Check that all internal links in this file batch are valid.
*
* Logs the results to the console and returns `true` if there were no issues.
*/
async check(
checkExternalLinks: boolean,
otherFiles: File[],
): Promise<boolean> {
console.log(`\n\nChecking links for ${this.description}`);
async checkInternalLinks(otherFiles: File[]): Promise<boolean> {
console.log(`\n\nChecking internal links for ${this.description}`);

const [docsFiles, internalLinkList, externalLinkList] =
await this.load(checkExternalLinks);
const [docsFiles, links] = await this.load();
const existingFiles = docsFiles.concat(otherFiles);

const results = internalLinkList.map((link) => link.check(existingFiles));

// For loop reduces the risk of rate-limiting.
for (let link of externalLinkList) {
results.push(await link.check());
}

let allGood = true;
results
.filter((res) => res !== undefined)
.forEach((linkError) => {
console.error(linkError);
allGood = false;
});
links.forEach((link) => {
const result = link.check(existingFiles);
if (result === undefined) return;
console.error(result);
allGood = false;
});
return allGood;
}
}
Expand Down

0 comments on commit 6b079d1

Please sign in to comment.