Skip to content

Commit

Permalink
feat(gatsby-source-drupal): Use Got instead of Axios for retries & ca…
Browse files Browse the repository at this point in the history
…ching support + restrict to 20 concurrent requests (#31514)

* feat(gatsby-source-drupal): Use Got instead of Axios for retries & caching support + restrict to 10 concurrent requests

Got is a lot more robust http library so let's use that instead of Axios.

For a very small local test site, this drops sourcing time from ~4s to ~0.4s. Increasing the concurrency to 20 from 10 regresses back to 4s. So somewhere between 10 and 20 concurrent requests, my local setup gets grumpy.

* fix lint

* Update yarn.lock

* Add keep-alive http agent

* Fix tests

* yarn.lock updates

* WIP

* 5 seems to the most stable value for concurrency & fastest for uncached sourcing

* Update yarn.lock

* Update renovate.json

* Remove logging code

* revert some unwanted changes

* Add option for setting concurrency for API requests. Based on @Auspicus' testing, bump default concurrency to 20 (it was 2x faster for his site vs. 5)

* Use Gatsby's cache so it's persisted to disk in between builds
  • Loading branch information
KyleAMathews committed Jun 18, 2021
1 parent f91dd52 commit b0de8f8
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 77 deletions.
24 changes: 12 additions & 12 deletions benchmarks/source-drupal/package.json
Expand Up @@ -16,21 +16,21 @@
},
"dependencies": {
"cross-env": "^7.0.0",
"dotenv": "^8.2.0",
"faker": "^4.1.0",
"gatsby": "^2.19.7",
"gatsby-image": "^2.2.40",
"dotenv": "^9.0.2",
"faker": "^5.5.3",
"gatsby": "^3.5.1",
"gatsby-image": "^3.5.0",
"gatsby-plugin-benchmark-reporting": "*",
"gatsby-plugin-sharp": "^2.4.5",
"gatsby-source-drupal": "^3.3.18",
"gatsby-source-filesystem": "^2.1.48",
"gatsby-transformer-sharp": "^2.3.14",
"gatsby-plugin-sharp": "^3.5.0",
"gatsby-source-drupal": "^4.7.2",
"gatsby-source-filesystem": "^3.5.0",
"gatsby-transformer-sharp": "^3.5.0",
"lodash.kebabcase": "^4.1.1",
"node-fetch": "^2.6.0",
"react": "^16.12.0",
"react-dom": "^16.12.0",
"ts-node": "^8.6.2",
"typescript": "^3.8.3"
"react": "^17.0.2",
"react-dom": "^17.0.2",
"ts-node": "^9.1.1",
"typescript": "^4.2.4"
},
"devDependencies": {
"prettier": "2.0.4"
Expand Down
15 changes: 12 additions & 3 deletions packages/gatsby-source-drupal/README.md
Expand Up @@ -150,7 +150,7 @@ module.exports = {
}
```

## GET Params
## GET Search Params

You can append optional GET request params to the request url using `params` option.

Expand Down Expand Up @@ -214,9 +214,13 @@ module.exports = {
}
```

## Concurrent API Requests

You can use the `concurrentAPIRequests` option to change how many simultaneous API requests are made to the server/service. 20 is the default and seems to be the fastest for most sites.

## Disallowed Link Types

You can use the `disallowedLinkTypes` option to skip link types found in JSON:API documents. By default it skips the `self` and `describedby` links, which do not provide data that can be sourced. You may override the setting to add additional link types to be skipped.
You can use the `disallowedLinkTypes` option to skip link types found in JSON:API documents. By default it skips the `self`, `describedby`, `contact_message--feedback`, and `contact_message--pesonal` links, which do not provide data that can be sourced. You may override the setting to add additional link types to be skipped.

```javascript
// In your gatsby-config.js
Expand All @@ -227,7 +231,12 @@ module.exports = {
options: {
baseUrl: `https://live-contentacms.pantheonsite.io/`,
// skip the action--action resource type.
disallowedLinkTypes: [`self`, `describedby`, `action--action`],
disallowedLinkTypes: [
`self`,
`describedby`,
`contact_message--feedback`,
`contact_message--personal`,
],
},
},
],
Expand Down
10 changes: 4 additions & 6 deletions packages/gatsby-source-drupal/package.json
Expand Up @@ -8,7 +8,9 @@
},
"dependencies": {
"@babel/runtime": "^7.14.0",
"axios": "^0.21.1",
"got": "^11.8.2",
"agentkeepalive": "^4.1.1",
"fastq": "^1.11.0",
"bluebird": "^3.7.2",
"body-parser": "^1.19.0",
"gatsby-source-filesystem": "^3.8.0-next.1",
Expand All @@ -23,11 +25,7 @@
"cross-env": "^7.0.3"
},
"homepage": "https://github.com/gatsbyjs/gatsby/tree/master/packages/gatsby-source-drupal#readme",
"keywords": [
"gatsby",
"gatsby-plugin",
"gatsby-source-plugin"
],
"keywords": ["gatsby", "gatsby-plugin", "gatsby-source-plugin"],
"license": "MIT",
"peerDependencies": {
"gatsby": "^3.0.0-next.0"
Expand Down
30 changes: 14 additions & 16 deletions packages/gatsby-source-drupal/src/__tests__/index.js
@@ -1,16 +1,14 @@
jest.mock(`axios`, () => {
return {
get: jest.fn(path => {
const last = path.split(`/`).pop()
try {
return { data: require(`./fixtures/${last}.json`) }
} catch (e) {
console.log(`Error`, e)
return null
}
}),
}
})
jest.mock(`got`, () =>
jest.fn(path => {
const last = path.split(`/`).pop()
try {
return { body: require(`./fixtures/${last}.json`) }
} catch (e) {
console.log(`Error`, e)
return null
}
})
)

jest.mock(`gatsby-source-filesystem`, () => {
return {
Expand Down Expand Up @@ -506,18 +504,18 @@ describe(`gatsby-source-drupal`, () => {

describe(`Error handling`, () => {
describe(`Does end activities if error is thrown`, () => {
const axios = require(`axios`)
const got = require(`got`)
beforeEach(() => {
nodes = {}
reporter.activityTimer.mockClear()
activity.start.mockClear()
activity.end.mockClear()
axios.get.mockClear()
got.mockClear()
downloadFileSpy.mockClear()
})

it(`during data fetching`, async () => {
axios.get.mockImplementationOnce(() => {
got.mockImplementationOnce(() => {
throw new Error(`data fetching failed`)
})
expect.assertions(5)
Expand Down
104 changes: 71 additions & 33 deletions packages/gatsby-source-drupal/src/gatsby-node.js
@@ -1,6 +1,9 @@
const axios = require(`axios`)
const got = require(`got`)
const _ = require(`lodash`)
const urlJoin = require(`url-join`)
import HttpAgent from "agentkeepalive"

const { HttpsAgent } = HttpAgent

const { setOptions, getOptions } = require(`./plugin-options`)

Expand All @@ -12,6 +15,17 @@ const {
} = require(`./normalize`)
const { handleReferences, handleWebhookUpdate } = require(`./utils`)

const agent = {
http: new HttpAgent(),
https: new HttpsAgent(),
}

async function worker([url, options]) {
return got(url, { agent, ...options })
}

const requestQueue = require(`fastq`).promise(worker, 20)

const asyncPool = require(`tiny-async-pool`)
const bodyParser = require(`body-parser`)

Expand Down Expand Up @@ -53,12 +67,18 @@ exports.sourceNodes = async (
const {
baseUrl,
apiBase = `jsonapi`,
basicAuth,
basicAuth = {},
filters,
headers,
params,
params = {},
concurrentFileRequests = 20,
disallowedLinkTypes = [`self`, `describedby`],
concurrentAPIRequests = 20,
disallowedLinkTypes = [
`self`,
`describedby`,
`contact_message--feedback`,
`contact_message--personal`,
],
skipFileDownloads = false,
fastBuilds = false,
entityReferenceRevisions = [],
Expand All @@ -70,6 +90,9 @@ exports.sourceNodes = async (
} = pluginOptions
const { createNode, setPluginStatus, touchNode } = actions

// Update the concurrency limit from the plugin options
requestQueue.concurrency = concurrentAPIRequests

if (webhookBody && Object.keys(webhookBody).length) {
const changesActivity = reporter.activityTimer(
`loading Drupal content changes`,
Expand Down Expand Up @@ -139,19 +162,22 @@ exports.sourceNodes = async (

try {
// Hit fastbuilds endpoint with the lastFetched date.
const data = await axios.get(
const res = await requestQueue.push([
urlJoin(baseUrl, `gatsby-fastbuilds/sync/`, lastFetched.toString()),
{
auth: basicAuth,
username: basicAuth.username,
password: basicAuth.password,
headers,
params,
}
)
searchParams: params,
responseType: `json`,
cache,
},
])

if (data.data.status === -1) {
if (res.body.status === -1) {
// The incremental data is expired or this is the first fetch.
reporter.info(`Unable to pull incremental data changes from Drupal`)
setPluginStatus({ lastFetched: data.data.timestamp })
setPluginStatus({ lastFetched: res.body.timestamp })
requireFullRebuild = true
} else {
// Touch nodes so they are not garbage collected by Gatsby.
Expand All @@ -162,7 +188,7 @@ exports.sourceNodes = async (
})

// Process sync data from Drupal.
const nodesToSync = data.data.entities
const nodesToSync = res.body.entities
for (const nodeSyncData of nodesToSync) {
if (nodeSyncData.action === `delete`) {
actions.deleteNode(
Expand Down Expand Up @@ -208,7 +234,7 @@ exports.sourceNodes = async (
}
}

setPluginStatus({ lastFetched: data.data.timestamp })
setPluginStatus({ lastFetched: res.body.timestamp })
}
} catch (e) {
gracefullyRethrow(drupalFetchIncrementalActivity, e)
Expand All @@ -233,13 +259,19 @@ exports.sourceNodes = async (

let allData
try {
const data = await axios.get(urlJoin(baseUrl, apiBase), {
auth: basicAuth,
headers,
params,
})
const res = await requestQueue.push([
urlJoin(baseUrl, apiBase),
{
username: basicAuth.username,
password: basicAuth.password,
headers,
searchParams: params,
responseType: `json`,
cache,
},
])
allData = await Promise.all(
_.map(data.data.links, async (url, type) => {
_.map(res.body.links, async (url, type) => {
if (disallowedLinkTypes.includes(type)) return
if (!url) return
if (!type) return
Expand Down Expand Up @@ -276,31 +308,36 @@ exports.sourceNodes = async (

let d
try {
d = await axios.get(url, {
auth: basicAuth,
headers,
params,
})
d = await requestQueue.push([
url,
{
username: basicAuth.username,
password: basicAuth.password,
headers,
responseType: `json`,
cache,
},
])
} catch (error) {
if (error.response && error.response.status == 405) {
if (error.response && error.response.statusCode == 405) {
// The endpoint doesn't support the GET method, so just skip it.
return []
} else {
console.error(`Failed to fetch ${url}`, error.message)
console.log(error.data)
console.log(error)
throw error
}
}
data = data.concat(d.data.data)
data = data.concat(d.body.data)
// Add support for includes. Includes allow entity data to be expanded
// based on relationships. The expanded data is exposed as `included`
// in the JSON API response.
// See https://www.drupal.org/docs/8/modules/jsonapi/includes
if (d.data.included) {
data = data.concat(d.data.included)
if (d.body.included) {
data = data.concat(d.body.included)
}
if (d.data.links && d.data.links.next) {
data = await getNext(d.data.links.next, data)
if (d.body.links && d.body.links.next) {
data = await getNext(d.body.links.next, data)
}

return data
Expand Down Expand Up @@ -485,8 +522,8 @@ exports.pluginOptionsSchema = ({ Joi }) =>
`The path to the root of the JSONAPI — defaults to "jsonapi"`
),
basicAuth: Joi.object({
username: Joi.string(),
password: Joi.string(),
username: Joi.string().required(),
password: Joi.string().required(),
}).description(`Enables basicAuth`),
filters: Joi.object().description(
`Pass filters to the JSON API for specific collections`
Expand All @@ -496,6 +533,7 @@ exports.pluginOptionsSchema = ({ Joi }) =>
),
params: Joi.object().description(`Append optional GET params to requests`),
concurrentFileRequests: Joi.number().integer().default(20).min(1),
concurrentAPIRequests: Joi.number().integer().default(20).min(1),
disallowedLinkTypes: Joi.array().items(Joi.string()),
skipFileDownloads: Joi.boolean(),
fastBuilds: Joi.boolean(),
Expand Down
8 changes: 2 additions & 6 deletions renovate.json5
Expand Up @@ -16733,9 +16733,7 @@
],
"groupName": "minor and patch dependencies for gatsby-source-drupal",
"groupSlug": "gatsby-source-drupal-prod-minor",
"matchPackageNames": [
"axios"
],
"matchPackageNames": [],
"matchUpdateTypes": [
"patch"
],
Expand Down Expand Up @@ -16768,9 +16766,7 @@
],
"groupName": "major dependencies for gatsby-source-drupal",
"groupSlug": "gatsby-source-drupal-prod-major",
"matchPackageNames": [
"axios"
],
"matchPackageNames": [],
"matchUpdateTypes": [
"major",
"minor"
Expand Down

0 comments on commit b0de8f8

Please sign in to comment.