Skip to content

Commit

Permalink
feat(core): make broken link checker detect broken anchors - add `onB…
Browse files Browse the repository at this point in the history
…rokenAnchors` config (#9528)

Co-authored-by: sebastienlorber <lorber.sebastien@gmail.com>
  • Loading branch information
OzakIOne and slorber committed Jan 5, 2024
1 parent 6d1897d commit 760a5ae
Show file tree
Hide file tree
Showing 52 changed files with 1,221 additions and 520 deletions.
Expand Up @@ -12,17 +12,17 @@ exports[`transformAsset plugin pathname protocol 1`] = `
exports[`transformAsset plugin transform md links to <a /> 1`] = `
"[asset](https://example.com/asset.pdf)
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default} />
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default} />
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}>asset</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}>asset</a>
in paragraph <a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}>asset</a>
in paragraph <a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}>asset</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset (2).pdf").default}>asset with URL encoded chars</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset (2).pdf").default}>asset with URL encoded chars</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default + '#page=2'}>asset with hash</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default + '#page=2'}>asset with hash</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default} title="Title">asset</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default} title="Title">asset</a>
[page](noUrl.md)
Expand All @@ -36,24 +36,24 @@ in paragraph <a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file
[assets](/github/!file-loader!/assets.pdf)
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}>asset</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}>asset</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static2/asset2.pdf").default}>asset2</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static2/asset2.pdf").default}>asset2</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>staticAsset.pdf</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>staticAsset.pdf</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>@site/static/staticAsset.pdf</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>@site/static/staticAsset.pdf</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default + '#page=2'} title="Title">@site/static/staticAsset.pdf</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default + '#page=2'} title="Title">@site/static/staticAsset.pdf</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>Just staticAsset.pdf</a>, and <a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>**awesome** staticAsset 2.pdf 'It is really "AWESOME"'</a>, but also <a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>coded \`staticAsset 3.pdf\`</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>Just staticAsset.pdf</a>, and <a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>**awesome** staticAsset 2.pdf 'It is really "AWESOME"'</a>, but also <a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAsset.pdf").default}>coded \`staticAsset 3.pdf\`</a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAssetImage.png").default}><img alt="Clickable Docusaurus logo" src={require("!<PROJECT_ROOT>/node_modules/url-loader/dist/cjs.js?limit=10000&name=assets/images/[name]-[contenthash].[ext]&fallback=<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js!./static/staticAssetImage.png").default} width="200" height="200" /></a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/staticAssetImage.png").default}><img alt="Clickable Docusaurus logo" src={require("!<PROJECT_ROOT>/node_modules/url-loader/dist/cjs.js?limit=10000&name=assets/images/[name]-[contenthash].[ext]&fallback=<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js!./static/staticAssetImage.png").default} width="200" height="200" /></a>
<a target="_blank" href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}><span style={{color: "red"}}>Stylized link to asset file</span></a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./asset.pdf").default}><span style={{color: "red"}}>Stylized link to asset file</span></a>
<a target="_blank" href={require("./data.raw!=!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./data.json").default}>JSON</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("./data.raw!=!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./data.json").default}>JSON</a>
<a target="_blank" href={require("./static/static-json.raw!=!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/static-json.json").default}>static JSON</a>
<a target="_blank" data-noBrokenLinkCheck={true} href={require("./static/static-json.raw!=!<PROJECT_ROOT>/node_modules/file-loader/dist/cjs.js?name=assets/files/[name]-[contenthash].[ext]!./static/static-json.json").default}>static JSON</a>
"
`;
28 changes: 28 additions & 0 deletions packages/docusaurus-mdx-loader/src/remark/transformLinks/index.ts
Expand Up @@ -73,6 +73,34 @@ async function toAssetRequireNode(
value: '_blank',
});

// Assets are not routes, and are required by Webpack already
// They should not trigger the broken link checker
attributes.push({
type: 'mdxJsxAttribute',
name: 'data-noBrokenLinkCheck',
value: {
type: 'mdxJsxAttributeValueExpression',
value: 'true',
data: {
estree: {
type: 'Program',
body: [
{
type: 'ExpressionStatement',
expression: {
type: 'Literal',
value: true,
raw: 'true',
},
},
],
sourceType: 'module',
comments: [],
},
},
},
});

attributes.push({
type: 'mdxJsxAttribute',
name: 'href',
Expand Down
9 changes: 9 additions & 0 deletions packages/docusaurus-module-type-aliases/src/index.d.ts
Expand Up @@ -260,6 +260,15 @@ declare module '@docusaurus/useRouteContext' {
export default function useRouteContext(): PluginRouteContext;
}

declare module '@docusaurus/useBrokenLinks' {
export type BrokenLinks = {
collectLink: (link: string) => void;
collectAnchor: (anchor: string) => void;
};

export default function useBrokenLinks(): BrokenLinks;
}

declare module '@docusaurus/useIsBrowser' {
export default function useIsBrowser(): boolean;
}
Expand Down
4 changes: 4 additions & 0 deletions packages/docusaurus-theme-classic/src/theme/Heading/index.tsx
Expand Up @@ -10,11 +10,13 @@ import clsx from 'clsx';
import {translate} from '@docusaurus/Translate';
import {useThemeConfig} from '@docusaurus/theme-common';
import Link from '@docusaurus/Link';
import useBrokenLinks from '@docusaurus/useBrokenLinks';
import type {Props} from '@theme/Heading';

import styles from './styles.module.css';

export default function Heading({as: As, id, ...props}: Props): JSX.Element {
const brokenLinks = useBrokenLinks();
const {
navbar: {hideOnScroll},
} = useThemeConfig();
Expand All @@ -23,6 +25,8 @@ export default function Heading({as: As, id, ...props}: Props): JSX.Element {
return <As {...props} id={undefined} />;
}

brokenLinks.collectAnchor(id);

const anchorTitle = translate(
{
id: 'theme.common.headingLinkTitle',
Expand Down
7 changes: 7 additions & 0 deletions packages/docusaurus-types/src/config.d.ts
Expand Up @@ -175,6 +175,13 @@ export type DocusaurusConfig = {
* @default "throw"
*/
onBrokenLinks: ReportingSeverity;
/**
* The behavior of Docusaurus when it detects any broken link.
*
* @see https://docusaurus.io/docs/api/docusaurus-config#onBrokenAnchors
* @default "warn"
*/
onBrokenAnchors: ReportingSeverity;
/**
* The behavior of Docusaurus when it detects any broken markdown link.
*
Expand Down
133 changes: 133 additions & 0 deletions packages/docusaurus-utils/src/__tests__/urlUtils.test.ts
Expand Up @@ -18,6 +18,8 @@ import {
buildSshUrl,
buildHttpsUrl,
hasSSHProtocol,
parseURLPath,
serializeURLPath,
} from '../urlUtils';

describe('normalizeUrl', () => {
Expand Down Expand Up @@ -232,6 +234,137 @@ describe('removeTrailingSlash', () => {
});
});

describe('parseURLPath', () => {
it('parse and resolve pathname', () => {
expect(parseURLPath('')).toEqual({
pathname: '/',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/')).toEqual({
pathname: '/',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/page')).toEqual({
pathname: '/page',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/dir1/page')).toEqual({
pathname: '/dir1/page',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/dir1/dir2/./../page')).toEqual({
pathname: '/dir1/page',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/dir1/dir2/../..')).toEqual({
pathname: '/',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/dir1/dir2/../../..')).toEqual({
pathname: '/',
search: undefined,
hash: undefined,
});
expect(parseURLPath('./dir1/dir2./../page', '/dir3/dir4/page2')).toEqual({
pathname: '/dir3/dir4/dir1/page',
search: undefined,
hash: undefined,
});
});

it('parse query string', () => {
expect(parseURLPath('/page')).toEqual({
pathname: '/page',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/page?')).toEqual({
pathname: '/page',
search: '',
hash: undefined,
});
expect(parseURLPath('/page?test')).toEqual({
pathname: '/page',
search: 'test',
hash: undefined,
});
expect(parseURLPath('/page?age=42&great=true')).toEqual({
pathname: '/page',
search: 'age=42&great=true',
hash: undefined,
});
});

it('parse hash', () => {
expect(parseURLPath('/page')).toEqual({
pathname: '/page',
search: undefined,
hash: undefined,
});
expect(parseURLPath('/page#')).toEqual({
pathname: '/page',
search: undefined,
hash: '',
});
expect(parseURLPath('/page#anchor')).toEqual({
pathname: '/page',
search: undefined,
hash: 'anchor',
});
});

it('parse fancy real-world edge cases', () => {
expect(parseURLPath('/page?#')).toEqual({
pathname: '/page',
search: '',
hash: '',
});
expect(
parseURLPath('dir1/dir2/../page?age=42#anchor', '/dir3/page2'),
).toEqual({
pathname: '/dir3/dir1/page',
search: 'age=42',
hash: 'anchor',
});
});
});

describe('serializeURLPath', () => {
function test(input: string, base?: string, expectedOutput?: string) {
expect(serializeURLPath(parseURLPath(input, base))).toEqual(
expectedOutput ?? input,
);
}

it('works for already resolved paths', () => {
test('/');
test('/dir1/page');
test('/dir1/page?');
test('/dir1/page#');
test('/dir1/page?#');
test('/dir1/page?age=42#anchor');
});

it('works for relative paths', () => {
test('', undefined, '/');
test('', '/dir1/dir2/page2', '/dir1/dir2/page2');
test('page', '/dir1/dir2/page2', '/dir1/dir2/page');
test('../page', '/dir1/dir2/page2', '/dir1/page');
test('/dir1/dir2/../page', undefined, '/dir1/page');
test(
'/dir1/dir2/../page?age=42#anchor',
undefined,
'/dir1/page?age=42#anchor',
);
});
});

describe('resolvePathname', () => {
it('works', () => {
// These tests are directly copied from https://github.com/mjackson/resolve-pathname/blob/master/modules/__tests__/resolvePathname-test.js
Expand Down
3 changes: 3 additions & 0 deletions packages/docusaurus-utils/src/index.ts
Expand Up @@ -48,13 +48,16 @@ export {
encodePath,
isValidPathname,
resolvePathname,
parseURLPath,
serializeURLPath,
addLeadingSlash,
addTrailingSlash,
removeTrailingSlash,
hasSSHProtocol,
buildHttpsUrl,
buildSshUrl,
} from './urlUtils';
export type {URLPath} from './urlUtils';
export {
type Tag,
type TagsListItem,
Expand Down
59 changes: 59 additions & 0 deletions packages/docusaurus-utils/src/urlUtils.ts
Expand Up @@ -165,14 +165,73 @@ export function isValidPathname(str: string): boolean {
}
}

export type URLPath = {pathname: string; search?: string; hash?: string};

// Let's name the concept of (pathname + search + hash) as URLPath
// See also https://twitter.com/kettanaito/status/1741768992866308120
// Note: this function also resolves relative pathnames while parsing!
export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
function parseURL(url: string, base?: string | URL): URL {
try {
// A possible alternative? https://github.com/unjs/ufo#url
return new URL(url, base ?? 'https://example.com');
} catch (e) {
throw new Error(
`Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
{cause: e},
);
}
}

const base = fromPath ? parseURL(fromPath) : undefined;
const url = parseURL(urlPath, base);

const {pathname} = url;

// Fixes annoying url.search behavior
// "" => undefined
// "?" => ""
// "?param => "param"
const search = url.search
? url.search.slice(1)
: urlPath.includes('?')
? ''
: undefined;

// Fixes annoying url.hash behavior
// "" => undefined
// "#" => ""
// "?param => "param"
const hash = url.hash
? url.hash.slice(1)
: urlPath.includes('#')
? ''
: undefined;

return {
pathname,
search,
hash,
};
}

export function serializeURLPath(urlPath: URLPath): string {
const search = urlPath.search === undefined ? '' : `?${urlPath.search}`;
const hash = urlPath.hash === undefined ? '' : `#${urlPath.hash}`;
return `${urlPath.pathname}${search}${hash}`;
}

/**
* Resolve pathnames and fail-fast if resolution fails. Uses standard URL
* semantics (provided by `resolve-pathname` which is used internally by React
* router)
*/
export function resolvePathname(to: string, from?: string): string {
// TODO do we really need resolve-pathname lib anymore?
// possible alternative: decodeURI(parseURLPath(to, from).pathname);
return resolvePathnameUnsafe(to, from);
}

/** Appends a leading slash to `str`, if one doesn't exist. */
export function addLeadingSlash(str: string): string {
return addPrefix(str, '/');
Expand Down

0 comments on commit 760a5ae

Please sign in to comment.