Skip to content

Commit cebea21

Browse files
authoredJan 3, 2021
feat: introduce retry-after detection (#221)
This introduces a --retry flag, which when passed will automatically retry requests that comes back with a HTTP 429, and a retry-after header. I tested this against GitHub , and it appears to work as expected.
1 parent 936af89 commit cebea21

12 files changed

+582
-140
lines changed
 

‎README.md

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ $ linkinator LOCATIONS [ --arguments ]
6464
--recurse, -r
6565
Recursively follow links on the same root domain.
6666
67+
--retry,
68+
Automatically retry requests that return HTTP 429 responses and include
69+
a 'retry-after' header. Defaults to false.
70+
6771
--server-root
6872
When scanning a locally directory, customize the location on disk
6973
where the server is started. Defaults to the path passed in [LOCATION].
@@ -183,6 +187,7 @@ Asynchronous method that runs a site wide scan. Options come in the form of an o
183187
- `concurrency` (number) - The number of connections to make simultaneously. Defaults to 100.
184188
- `port` (number) - When the `path` is provided as a local path on disk, the `port` on which to start the temporary web server. Defaults to a random high range order port.
185189
- `recurse` (boolean) - By default, all scans are shallow. Only the top level links on the requested page will be scanned. By setting `recurse` to `true`, the crawler will follow all links on the page, and continue scanning links **on the same domain** for as long as it can go. Results are cached, so no worries about loops.
190+
- `retry` (boolean|RetryConfig) - Automatically retry requests that respond with an HTTP 429, and include a `retry-after` header. The `RetryConfig` option is a placeholder for fine-grained controls to be implemented at a later time, and is only included here to signal forward-compatibility.
186191
- `serverRoot` (string) - When scanning a locally directory, customize the location on disk
187192
where the server is started. Defaults to the path passed in `path`.
188193
- `timeout` (number) - By default, requests made by linkinator do not time out (or follow the settings of the OS). This option (in milliseconds) will fail requests after the configured amount of time.

‎src/cli.ts

+16-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@
33
import * as meow from 'meow';
44
import * as updateNotifier from 'update-notifier';
55
import chalk = require('chalk');
6-
import {LinkChecker, LinkState, LinkResult, CheckOptions} from './index';
6+
import {
7+
LinkChecker,
8+
LinkState,
9+
LinkResult,
10+
CheckOptions,
11+
RetryInfo,
12+
} from './index';
713
import {promisify} from 'util';
814
import {Flags, getConfig} from './config';
915
import {Format, Logger, LogLevel} from './logger';
@@ -51,6 +57,10 @@ const cli = meow(
5157
--recurse, -r
5258
Recursively follow links on the same root domain.
5359
60+
--retry,
61+
Automatically retry requests that return HTTP 429 responses and include
62+
a 'retry-after' header. Defaults to false.
63+
5464
--server-root
5565
When scanning a locally directory, customize the location on disk
5666
where the server is started. Defaults to the path passed in [LOCATION].
@@ -85,6 +95,7 @@ const cli = meow(
8595
serverRoot: {type: 'string'},
8696
verbosity: {type: 'string'},
8797
directoryListing: {type: 'boolean'},
98+
retry: {type: 'boolean'},
8899
},
89100
booleanDefault: undefined,
90101
}
@@ -107,6 +118,9 @@ async function main() {
107118
logger.error(`🏊‍♂️ crawling ${cli.input}`);
108119

109120
const checker = new LinkChecker();
121+
checker.on('retry', (info: RetryInfo) => {
122+
logger.warn(`Retrying: ${info.url} in ${info.secondsUntilRetry} seconds.`);
123+
});
110124
checker.on('link', (link: LinkResult) => {
111125
let state = '';
112126
switch (link.state) {
@@ -132,6 +146,7 @@ async function main() {
132146
concurrency: Number(flags.concurrency),
133147
serverRoot: flags.serverRoot,
134148
directoryListing: flags.directoryListing,
149+
retry: flags.retry,
135150
};
136151
if (flags.skip) {
137152
if (typeof flags.skip === 'string') {

‎src/config.ts

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export interface Flags {
1414
markdown?: boolean;
1515
serverRoot?: string;
1616
directoryListing?: boolean;
17+
retry?: boolean;
1718
}
1819

1920
export async function getConfig(flags: Flags) {

‎src/index.ts

+106-139
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,28 @@
11
import {EventEmitter} from 'events';
22
import {URL} from 'url';
33
import * as http from 'http';
4-
import * as fs from 'fs';
5-
import * as util from 'util';
6-
import * as path from 'path';
74

85
import {request, GaxiosResponse} from 'gaxios';
9-
import PQueue, {DefaultAddOptions} from 'p-queue';
10-
import PriorityQueue from 'p-queue/dist/priority-queue';
11-
import * as globby from 'glob';
126

7+
import {Queue} from './queue';
138
import {getLinks} from './links';
149
import {startWebServer} from './server';
10+
import {CheckOptions, processOptions} from './options';
1511

16-
const stat = util.promisify(fs.stat);
17-
const glob = util.promisify(globby);
18-
19-
export interface CheckOptions {
20-
concurrency?: number;
21-
port?: number;
22-
path: string | string[];
23-
recurse?: boolean;
24-
timeout?: number;
25-
markdown?: boolean;
26-
linksToSkip?: string[] | ((link: string) => Promise<boolean>);
27-
serverRoot?: string;
28-
directoryListing?: boolean;
29-
}
12+
export {CheckOptions};
3013

3114
export enum LinkState {
3215
OK = 'OK',
3316
BROKEN = 'BROKEN',
3417
SKIPPED = 'SKIPPED',
3518
}
3619

20+
export interface RetryInfo {
21+
url: string;
22+
secondsUntilRetry: number;
23+
status: number;
24+
}
25+
3726
export interface LinkResult {
3827
url: string;
3928
status?: number;
@@ -53,9 +42,11 @@ interface CrawlOptions {
5342
crawl: boolean;
5443
results: LinkResult[];
5544
cache: Set<string>;
45+
delayCache: Map<string, number>;
5646
checkOptions: CheckOptions;
57-
queue: PQueue<PriorityQueue, DefaultAddOptions>;
47+
queue: Queue;
5848
rootPath: string;
49+
retry: boolean;
5950
}
6051

6152
// Spoof a normal looking User-Agent to keep the servers happy
@@ -64,6 +55,12 @@ export const headers = {
6455
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
6556
};
6657

58+
export declare interface LinkChecker {
59+
on(event: 'link', listener: (result: LinkResult) => void): this;
60+
on(event: 'pagestart', listener: (link: string) => void): this;
61+
on(event: 'retry', listener: (details: RetryInfo) => void): this;
62+
}
63+
6764
/**
6865
* Instance class used to perform a crawl job.
6966
*/
@@ -74,7 +71,7 @@ export class LinkChecker extends EventEmitter {
7471
* @param options Options to use while checking for 404s
7572
*/
7673
async check(opts: CheckOptions) {
77-
const options = await this.processOptions(opts);
74+
const options = await processOptions(opts);
7875
if (!Array.isArray(options.path)) {
7976
options.path = [options.path];
8077
}
@@ -101,12 +98,13 @@ export class LinkChecker extends EventEmitter {
10198
console.log(options);
10299
}
103100

104-
const queue = new PQueue({
101+
const queue = new Queue({
105102
concurrency: options.concurrency || 100,
106103
});
107104

108105
const results = new Array<LinkResult>();
109106
const initCache: Set<string> = new Set();
107+
const delayCache: Map<string, number> = new Map();
110108

111109
for (const path of options.path) {
112110
const url = new URL(path);
@@ -118,8 +116,10 @@ export class LinkChecker extends EventEmitter {
118116
checkOptions: options,
119117
results,
120118
cache: initCache,
119+
delayCache,
121120
queue,
122121
rootPath: path,
122+
retry: !!opts.retry,
123123
});
124124
});
125125
}
@@ -135,121 +135,6 @@ export class LinkChecker extends EventEmitter {
135135
return result;
136136
}
137137

138-
/**
139-
* Validate the provided flags all work with each other.
140-
* @param options CheckOptions passed in from the CLI (or API)
141-
*/
142-
private async processOptions(opts: CheckOptions): Promise<CheckOptions> {
143-
const options = Object.assign({}, opts);
144-
145-
// ensure at least one path is provided
146-
if (options.path.length === 0) {
147-
throw new Error('At least one path must be provided');
148-
}
149-
150-
// normalize options.path to an array of strings
151-
if (!Array.isArray(options.path)) {
152-
options.path = [options.path];
153-
}
154-
155-
// disable directory listings by default
156-
if (options.directoryListing === undefined) {
157-
options.directoryListing = false;
158-
}
159-
160-
// Ensure we do not mix http:// and file system paths. The paths passed in
161-
// must all be filesystem paths, or HTTP paths.
162-
let isUrlType: boolean | undefined = undefined;
163-
for (const path of options.path) {
164-
const innerIsUrlType = path.startsWith('http');
165-
if (isUrlType === undefined) {
166-
isUrlType = innerIsUrlType;
167-
} else if (innerIsUrlType !== isUrlType) {
168-
throw new Error(
169-
'Paths cannot be mixed between HTTP and local filesystem paths.'
170-
);
171-
}
172-
}
173-
174-
// if there is a server root, make sure there are no HTTP paths
175-
if (options.serverRoot && isUrlType) {
176-
throw new Error(
177-
"'serverRoot' cannot be defined when the 'path' points to an HTTP endpoint."
178-
);
179-
}
180-
181-
if (options.serverRoot) {
182-
options.serverRoot = path.normalize(options.serverRoot);
183-
}
184-
185-
// expand globs into paths
186-
if (!isUrlType) {
187-
const paths: string[] = [];
188-
for (const filePath of options.path) {
189-
// The glob path provided is relative to the serverRoot. For example,
190-
// if the serverRoot is test/fixtures/nested, and the glob is "*/*.html",
191-
// The glob needs to be calculated from the serverRoot directory.
192-
const fullPath = options.serverRoot
193-
? path.join(options.serverRoot, filePath)
194-
: filePath;
195-
const expandedPaths = await glob(fullPath);
196-
if (expandedPaths.length === 0) {
197-
throw new Error(
198-
`The provided glob "${filePath}" returned 0 results. The current working directory is "${process.cwd()}".`
199-
);
200-
}
201-
// After resolving the globs, the paths need to be returned to their
202-
// original form, without the serverRoot included in the path.
203-
for (let p of expandedPaths) {
204-
p = path.normalize(p);
205-
if (options.serverRoot) {
206-
const contractedPath = p
207-
.split(path.sep)
208-
.slice(options.serverRoot.split(path.sep).length)
209-
.join(path.sep);
210-
paths.push(contractedPath);
211-
} else {
212-
paths.push(p);
213-
}
214-
}
215-
}
216-
options.path = paths;
217-
}
218-
219-
// enable markdown if someone passes a flag/glob right at it
220-
if (options.markdown === undefined) {
221-
for (const p of options.path) {
222-
if (path.extname(p).toLowerCase() === '.md') {
223-
options.markdown = true;
224-
}
225-
}
226-
}
227-
228-
// Figure out which directory should be used as the root for the web server,
229-
// and how that impacts the path to the file for the first request.
230-
if (!options.serverRoot && !isUrlType) {
231-
// if the serverRoot wasn't defined, and there are multiple paths, just
232-
// use process.cwd().
233-
if (options.path.length > 1) {
234-
options.serverRoot = process.cwd();
235-
} else {
236-
// if there's a single path, try to be smart and figure it out
237-
const s = await stat(options.path[0]);
238-
options.serverRoot = options.path[0];
239-
if (s.isFile()) {
240-
const pathParts = options.path[0].split(path.sep);
241-
options.path = [path.sep + pathParts[pathParts.length - 1]];
242-
options.serverRoot =
243-
pathParts.slice(0, pathParts.length - 1).join(path.sep) || '.';
244-
} else {
245-
options.serverRoot = options.path[0];
246-
options.path = '/';
247-
}
248-
}
249-
}
250-
return options;
251-
}
252-
253138
/**
254139
* Crawl a given url with the provided options.
255140
* @pram opts List of options used to do the crawl
@@ -260,7 +145,7 @@ export class LinkChecker extends EventEmitter {
260145
// explicitly skip non-http[s] links before making the request
261146
const proto = opts.url.protocol;
262147
if (proto !== 'http:' && proto !== 'https:') {
263-
const r = {
148+
const r: LinkResult = {
264149
url: opts.url.href,
265150
status: 0,
266151
state: LinkState.SKIPPED,
@@ -306,6 +191,22 @@ export class LinkChecker extends EventEmitter {
306191
}
307192
}
308193

194+
// Check if this host has been marked for delay due to 429
195+
if (opts.delayCache.has(opts.url.host)) {
196+
const timeout = opts.delayCache.get(opts.url.host)!;
197+
if (timeout > Date.now()) {
198+
opts.queue.add(
199+
async () => {
200+
await this.crawl(opts);
201+
},
202+
{
203+
delay: timeout - Date.now(),
204+
}
205+
);
206+
return;
207+
}
208+
}
209+
309210
// Perform a HEAD or GET request based on the need to crawl
310211
let status = 0;
311212
let state = LinkState.BROKEN;
@@ -322,6 +223,9 @@ export class LinkChecker extends EventEmitter {
322223
validateStatus: () => true,
323224
timeout: opts.checkOptions.timeout,
324225
});
226+
if (this.shouldRetryAfter(res, opts)) {
227+
return;
228+
}
325229

326230
// If we got an HTTP 405, the server may not like HEAD. GET instead!
327231
if (res.status === 405) {
@@ -333,6 +237,9 @@ export class LinkChecker extends EventEmitter {
333237
validateStatus: () => true,
334238
timeout: opts.checkOptions.timeout,
335239
});
240+
if (this.shouldRetryAfter(res, opts)) {
241+
return;
242+
}
336243
}
337244
} catch (err) {
338245
// request failure: invalid domain name, etc.
@@ -355,6 +262,9 @@ export class LinkChecker extends EventEmitter {
355262
headers,
356263
timeout: opts.checkOptions.timeout,
357264
});
265+
if (this.shouldRetryAfter(res, opts)) {
266+
return;
267+
}
358268
}
359269
} catch (ex) {
360270
failures.push(ex);
@@ -425,17 +335,74 @@ export class LinkChecker extends EventEmitter {
425335
url: result.url!,
426336
crawl,
427337
cache: opts.cache,
338+
delayCache: opts.delayCache,
428339
results: opts.results,
429340
checkOptions: opts.checkOptions,
430341
queue: opts.queue,
431342
parent: opts.url.href,
432343
rootPath: opts.rootPath,
344+
retry: opts.retry,
433345
});
434346
});
435347
}
436348
}
437349
}
438350
}
351+
/**
352+
* Check the incoming response for a `retry-after` header. If present,
353+
* and if the status was an HTTP 429, calculate the date at which this
354+
* request should be retried. Ensure the delayCache knows that we're
355+
* going to wait on requests for this entire host.
356+
* @param res GaxiosResponse returned from the request
357+
* @param opts CrawlOptions used during this request
358+
*/
359+
shouldRetryAfter(res: GaxiosResponse, opts: CrawlOptions): boolean {
360+
if (!opts.retry) {
361+
return false;
362+
}
363+
364+
const retryAfterRaw = res.headers['retry-after'];
365+
if (res.status !== 429 || !retryAfterRaw) {
366+
return false;
367+
}
368+
369+
// The `retry-after` header can come in either <seconds> or
370+
// A specific date to go check.
371+
let retryAfter = Number(retryAfterRaw) * 1000 + Date.now();
372+
if (isNaN(retryAfter)) {
373+
retryAfter = Date.parse(retryAfterRaw);
374+
if (isNaN(retryAfter)) {
375+
return false;
376+
}
377+
}
378+
379+
// check to see if there is already a request to wait for this host
380+
if (opts.delayCache.has(opts.url.host)) {
381+
// use whichever time is higher in the cache
382+
const currentTimeout = opts.delayCache.get(opts.url.host)!;
383+
if (retryAfter > currentTimeout) {
384+
opts.delayCache.set(opts.url.host, retryAfter);
385+
}
386+
} else {
387+
opts.delayCache.set(opts.url.host, retryAfter);
388+
}
389+
390+
opts.queue.add(
391+
async () => {
392+
await this.crawl(opts);
393+
},
394+
{
395+
delay: retryAfter - Date.now(),
396+
}
397+
);
398+
const retryDetails: RetryInfo = {
399+
url: opts.url.href,
400+
status: res.status,
401+
secondsUntilRetry: Math.round((retryAfter - Date.now()) / 1000),
402+
};
403+
this.emit('retry', retryDetails);
404+
return true;
405+
}
439406
}
440407

441408
/**

‎src/options.ts

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import * as fs from 'fs';
2+
import * as util from 'util';
3+
import * as path from 'path';
4+
import * as globby from 'glob';
5+
6+
const stat = util.promisify(fs.stat);
7+
const glob = util.promisify(globby);
8+
9+
export interface CheckOptions {
10+
concurrency?: number;
11+
port?: number;
12+
path: string | string[];
13+
recurse?: boolean;
14+
timeout?: number;
15+
markdown?: boolean;
16+
linksToSkip?: string[] | ((link: string) => Promise<boolean>);
17+
serverRoot?: string;
18+
directoryListing?: boolean;
19+
retry?: boolean;
20+
}
21+
22+
/**
23+
* Validate the provided flags all work with each other.
24+
* @param options CheckOptions passed in from the CLI (or API)
25+
*/
26+
export async function processOptions(
27+
opts: CheckOptions
28+
): Promise<CheckOptions> {
29+
const options = Object.assign({}, opts);
30+
31+
// ensure at least one path is provided
32+
if (options.path.length === 0) {
33+
throw new Error('At least one path must be provided');
34+
}
35+
36+
// normalize options.path to an array of strings
37+
if (!Array.isArray(options.path)) {
38+
options.path = [options.path];
39+
}
40+
41+
// disable directory listings by default
42+
if (options.directoryListing === undefined) {
43+
options.directoryListing = false;
44+
}
45+
46+
// Ensure we do not mix http:// and file system paths. The paths passed in
47+
// must all be filesystem paths, or HTTP paths.
48+
let isUrlType: boolean | undefined = undefined;
49+
for (const path of options.path) {
50+
const innerIsUrlType = path.startsWith('http');
51+
if (isUrlType === undefined) {
52+
isUrlType = innerIsUrlType;
53+
} else if (innerIsUrlType !== isUrlType) {
54+
throw new Error(
55+
'Paths cannot be mixed between HTTP and local filesystem paths.'
56+
);
57+
}
58+
}
59+
60+
// if there is a server root, make sure there are no HTTP paths
61+
if (options.serverRoot && isUrlType) {
62+
throw new Error(
63+
"'serverRoot' cannot be defined when the 'path' points to an HTTP endpoint."
64+
);
65+
}
66+
67+
if (options.serverRoot) {
68+
options.serverRoot = path.normalize(options.serverRoot);
69+
}
70+
71+
// expand globs into paths
72+
if (!isUrlType) {
73+
const paths: string[] = [];
74+
for (const filePath of options.path) {
75+
// The glob path provided is relative to the serverRoot. For example,
76+
// if the serverRoot is test/fixtures/nested, and the glob is "*/*.html",
77+
// The glob needs to be calculated from the serverRoot directory.
78+
const fullPath = options.serverRoot
79+
? path.join(options.serverRoot, filePath)
80+
: filePath;
81+
const expandedPaths = await glob(fullPath);
82+
if (expandedPaths.length === 0) {
83+
throw new Error(
84+
`The provided glob "${filePath}" returned 0 results. The current working directory is "${process.cwd()}".`
85+
);
86+
}
87+
// After resolving the globs, the paths need to be returned to their
88+
// original form, without the serverRoot included in the path.
89+
for (let p of expandedPaths) {
90+
p = path.normalize(p);
91+
if (options.serverRoot) {
92+
const contractedPath = p
93+
.split(path.sep)
94+
.slice(options.serverRoot.split(path.sep).length)
95+
.join(path.sep);
96+
paths.push(contractedPath);
97+
} else {
98+
paths.push(p);
99+
}
100+
}
101+
}
102+
options.path = paths;
103+
}
104+
105+
// enable markdown if someone passes a flag/glob right at it
106+
if (options.markdown === undefined) {
107+
for (const p of options.path) {
108+
if (path.extname(p).toLowerCase() === '.md') {
109+
options.markdown = true;
110+
}
111+
}
112+
}
113+
114+
// Figure out which directory should be used as the root for the web server,
115+
// and how that impacts the path to the file for the first request.
116+
if (!options.serverRoot && !isUrlType) {
117+
// if the serverRoot wasn't defined, and there are multiple paths, just
118+
// use process.cwd().
119+
if (options.path.length > 1) {
120+
options.serverRoot = process.cwd();
121+
} else {
122+
// if there's a single path, try to be smart and figure it out
123+
const s = await stat(options.path[0]);
124+
options.serverRoot = options.path[0];
125+
if (s.isFile()) {
126+
const pathParts = options.path[0].split(path.sep);
127+
options.path = [path.sep + pathParts[pathParts.length - 1]];
128+
options.serverRoot =
129+
pathParts.slice(0, pathParts.length - 1).join(path.sep) || '.';
130+
} else {
131+
options.serverRoot = options.path[0];
132+
options.path = '/';
133+
}
134+
}
135+
}
136+
return options;
137+
}

‎src/queue.ts

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import PQueue from 'p-queue';
2+
3+
export interface QueueOptions {
4+
concurrency?: number;
5+
}
6+
7+
export interface QueueItemOptions {
8+
delay?: number;
9+
}
10+
11+
export type AsyncFunction = () => Promise<void>;
12+
13+
export class Queue {
14+
private q: PQueue;
15+
private activeTimers = 0;
16+
17+
constructor(options: QueueOptions) {
18+
this.q = new PQueue({
19+
concurrency: options.concurrency,
20+
});
21+
}
22+
23+
add(fn: AsyncFunction, options?: QueueItemOptions) {
24+
if (options?.delay) {
25+
setTimeout(() => {
26+
this.q.add(fn);
27+
this.activeTimers--;
28+
}, options.delay);
29+
this.activeTimers++;
30+
} else {
31+
this.q.add(fn);
32+
}
33+
}
34+
35+
async onIdle() {
36+
await this.q.onIdle();
37+
await new Promise<void>(resolve => {
38+
if (this.activeTimers === 0) {
39+
resolve();
40+
return;
41+
}
42+
const timer = setInterval(async () => {
43+
if (this.activeTimers === 0 && this.q.size === 0) {
44+
await this.q.onIdle();
45+
clearInterval(timer);
46+
resolve();
47+
return;
48+
}
49+
}, 500);
50+
});
51+
}
52+
}

‎test/fixtures/retry/index.html

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<html>
2+
<body>
3+
<a href="http://fake.local/1">linky</a>
4+
<a href="http://fake.local/3">linky</a>
5+
<a href="subpage.html">subpage!</a>
6+
</body>
7+
</html>

‎test/fixtures/retry/subpage.html

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<body>
3+
<a href="http://fake.local/2">linky</a>
4+
</body>
5+
</html>

‎test/fixtures/retryCLI/index.html

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<html>
2+
<body>
3+
<a href="http://localhost:3333">linky</a>
4+
</body>
5+
</html>

‎test/zcli.ts ‎test/test.cli.ts

+46
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,28 @@
11
import {describe, it} from 'mocha';
22
import * as execa from 'execa';
33
import {assert} from 'chai';
4+
import * as http from 'http';
5+
import * as util from 'util';
6+
import enableDestroy = require('server-destroy');
47
import {LinkResult, LinkState} from '../src/index';
58

69
describe('cli', () => {
10+
let server: http.Server;
11+
12+
if (process.env.LINKINATOR_SKIP_CLI_TESTS) {
13+
return;
14+
}
15+
716
before(async () => {
817
await execa('npm', ['link']);
918
});
1019

20+
afterEach(async () => {
21+
if (server) {
22+
await util.promisify(server.destroy)();
23+
}
24+
});
25+
1126
it('should show output for failures', async () => {
1227
const res = await execa('npx', ['linkinator', 'test/fixtures/basic'], {
1328
reject: false,
@@ -194,4 +209,35 @@ describe('cli', () => {
194209
]);
195210
assert.strictEqual(res.exitCode, 0);
196211
});
212+
213+
it('should warn on retries', async () => {
214+
// start a web server to return the 429
215+
let requestCount = 0;
216+
let firstRequestTime: number;
217+
const port = 3333;
218+
const delayMillis = 1000;
219+
server = http.createServer((_, res) => {
220+
if (requestCount === 0) {
221+
res.writeHead(429, {
222+
'retry-after': 1,
223+
});
224+
requestCount++;
225+
firstRequestTime = Date.now();
226+
} else {
227+
assert.isAtLeast(Date.now(), firstRequestTime + delayMillis);
228+
res.writeHead(200);
229+
}
230+
res.end();
231+
});
232+
enableDestroy(server);
233+
await new Promise<void>(r => server.listen(port, r));
234+
235+
const res = await execa('npx', [
236+
'linkinator',
237+
'--retry',
238+
'test/fixtures/retryCLI',
239+
]);
240+
assert.strictEqual(res.exitCode, 0);
241+
assert.include(res.stdout, `Retrying: http://localhost:${port}`);
242+
});
197243
});

‎test/test.ts ‎test/test.index.ts

File renamed without changes.

‎test/test.retry.ts

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import {assert} from 'chai';
2+
import * as nock from 'nock';
3+
import * as sinon from 'sinon';
4+
import {describe, it, afterEach} from 'mocha';
5+
6+
import {check, LinkChecker} from '../src';
7+
8+
nock.disableNetConnect();
9+
nock.enableNetConnect('localhost');
10+
11+
describe('retries', () => {
12+
afterEach(() => {
13+
sinon.restore();
14+
nock.cleanAll();
15+
});
16+
17+
it('should handle 429s with invalid retry-after headers', async () => {
18+
const scope = nock('http://fake.local').get('/').reply(429, undefined, {
19+
'retry-after': 'totally-not-valid',
20+
});
21+
const results = await check({
22+
path: 'test/fixtures/basic',
23+
retry: true,
24+
});
25+
assert.ok(!results.passed);
26+
scope.done();
27+
});
28+
29+
it('should retry 429s with second based header', async () => {
30+
const scope = nock('http://fake.local')
31+
.get('/')
32+
.reply(429, undefined, {
33+
'retry-after': '10',
34+
})
35+
.get('/')
36+
.reply(200);
37+
38+
const {promise, resolve} = invertedPromise();
39+
const checker = new LinkChecker().on('retry', resolve);
40+
const clock = sinon.useFakeTimers();
41+
const checkPromise = checker.check({
42+
path: 'test/fixtures/basic',
43+
retry: true,
44+
});
45+
await promise;
46+
await clock.tickAsync(10_000);
47+
const results = await checkPromise;
48+
assert.ok(results.passed);
49+
scope.done();
50+
});
51+
52+
it('should retry 429s after failed HEAD', async () => {
53+
const scope = nock('http://fake.local')
54+
.head('/')
55+
.reply(405)
56+
.get('/')
57+
.reply(429, undefined, {
58+
'retry-after': '10',
59+
})
60+
.get('/')
61+
.reply(200);
62+
63+
const {promise, resolve} = invertedPromise();
64+
const checker = new LinkChecker().on('retry', resolve);
65+
const clock = sinon.useFakeTimers();
66+
const checkPromise = checker.check({
67+
path: 'test/fixtures/basic',
68+
retry: true,
69+
});
70+
await promise;
71+
await clock.tickAsync(10000);
72+
const results = await checkPromise;
73+
assert.ok(results.passed);
74+
scope.done();
75+
});
76+
77+
it('should retry 429s with date based header', async () => {
78+
const scope = nock('http://fake.local')
79+
.get('/')
80+
.reply(429, undefined, {
81+
'retry-after': '1970-01-01T00:00:10.000Z',
82+
})
83+
.get('/')
84+
.reply(200);
85+
86+
const {promise, resolve} = invertedPromise();
87+
const checker = new LinkChecker().on('retry', resolve);
88+
const clock = sinon.useFakeTimers();
89+
const checkPromise = checker.check({
90+
path: 'test/fixtures/basic',
91+
retry: true,
92+
});
93+
await promise;
94+
await clock.tickAsync(10000);
95+
const results = await checkPromise;
96+
assert.ok(results.passed);
97+
scope.done();
98+
});
99+
100+
it('should detect requests to wait on the same host', async () => {
101+
const scope = nock('http://fake.local')
102+
.get('/1')
103+
.reply(429, undefined, {
104+
'retry-after': '3',
105+
})
106+
.get('/1', () => {
107+
assert.isAtLeast(Date.now(), 3000);
108+
return true;
109+
})
110+
.reply(200)
111+
.get('/2', () => {
112+
assert.isAtLeast(Date.now(), 3000);
113+
return true;
114+
})
115+
.reply(200)
116+
.get('/3')
117+
.reply(429, undefined, {
118+
'retry-after': '3',
119+
})
120+
.get('/3', () => {
121+
assert.isAtLeast(Date.now(), 3000);
122+
return true;
123+
})
124+
.reply(200);
125+
126+
const {promise, resolve} = invertedPromise();
127+
const checker = new LinkChecker().on('retry', resolve);
128+
const clock = sinon.useFakeTimers();
129+
const checkPromise = checker.check({
130+
path: 'test/fixtures/retry',
131+
recurse: true,
132+
retry: true,
133+
});
134+
await promise;
135+
await clock.tickAsync(3000);
136+
const results = await checkPromise;
137+
assert.ok(results.passed);
138+
scope.done();
139+
});
140+
141+
it('should increase timeout for followup requests to a host', async () => {
142+
const scope = nock('http://fake.local')
143+
.get('/1')
144+
.reply(429, undefined, {
145+
'retry-after': '3',
146+
})
147+
.get('/1', () => {
148+
// even though the header said to wait 3 seconds, we are checking to
149+
// make sure the /3 route reset it to 9 seconds here. This is common
150+
// when a flood of requests come through and the retry-after gets
151+
// extended.
152+
assert.isAtLeast(Date.now(), 9000);
153+
return true;
154+
})
155+
.reply(200)
156+
.get('/2', () => {
157+
assert.isAtLeast(Date.now(), 9000);
158+
return true;
159+
})
160+
.reply(200)
161+
.get('/3')
162+
.reply(429, undefined, {
163+
'retry-after': '9',
164+
})
165+
.get('/3', () => {
166+
assert.isAtLeast(Date.now(), 9000);
167+
return true;
168+
})
169+
.reply(200);
170+
171+
const {promise: p1, resolve: r1} = invertedPromise();
172+
const {promise: p2, resolve: r2} = invertedPromise();
173+
const checker = new LinkChecker().on('retry', info => {
174+
if (info.url === 'http://fake.local/1') {
175+
r1();
176+
} else if (info.url === 'http://fake.local/3') {
177+
r2();
178+
}
179+
});
180+
const clock = sinon.useFakeTimers();
181+
const checkPromise = checker.check({
182+
path: 'test/fixtures/retry',
183+
recurse: true,
184+
retry: true,
185+
});
186+
await Promise.all([p1, p2]);
187+
await clock.tickAsync(9000);
188+
const results = await checkPromise;
189+
assert.ok(results.passed);
190+
scope.done();
191+
});
192+
193+
function invertedPromise() {
194+
let resolve!: () => void;
195+
let reject!: (err: Error) => void;
196+
const promise = new Promise<void>((innerResolve, innerReject) => {
197+
resolve = innerResolve;
198+
reject = innerReject;
199+
});
200+
return {promise, resolve, reject};
201+
}
202+
});

0 commit comments

Comments
 (0)
Please sign in to comment.