Skip to content

Commit

Permalink
perf(benchmarks): add gabe-csv-markdown benchmark (#26977)
Browse files Browse the repository at this point in the history
  • Loading branch information
pvdz committed Sep 21, 2020
1 parent 8ad565f commit 866abdf
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 90 deletions.
10 changes: 5 additions & 5 deletions benchmarks/gabe-csv-markdown/README.md
@@ -1,10 +1,10 @@
# Markdown Benchmark; CSV+text version
# Baseline Gatsby Benchmark: csv + markdown

This is a baseline benchmark for tracking CSV plaintext performance in the Gabe project.
This is a baseline benchmark site in the Gabe project.

This will produce the same site as `gabe-markdown` without using any markdown. It also generates one giant csv file containing all the data, rather than an individual file per page.
This site in particular tracks Markdown performance when sourcing from a single CSV file.

The site can generate an arbitrary amount of super simple pages. Each page has a small header, a quote, and two small paragraphs of random text. No images, because that's a fixed cost we're not interested in.
The site can generate an arbitrary amount of super simple pages. Each page has a small header, a quote, and two small paragraphs of random text. No images, because we want to benchmark Markdown.

## Install

Expand All @@ -21,7 +21,7 @@ N=1000 M=2 yarn bench
- `N=1000`: instructs the run to build a site of 1000 pages
- `M=2`: instructs nodejs to use up to 2gb of memory for its long term storage
- Deletes generates files from previous run
- Generates a new `gendata.csv` file containing `N` rows, each row being one page with pseudo-random content
- Generates `N` pages with pseudo-random content
- Runs `gatsby clean`
- Runs `gatsby build`

Expand Down
1 change: 1 addition & 0 deletions benchmarks/gabe-csv-markdown/gatsby-browser.js
@@ -0,0 +1 @@
// empty.
5 changes: 3 additions & 2 deletions benchmarks/gabe-csv-markdown/gatsby-config.js
Expand Up @@ -5,13 +5,14 @@ module.exports = {
author: "Bob the Blogger",
},
plugins: [
`gatsby-transformer-csv`,
`gatsby-transformer-remark`,
{
resolve: `gatsby-source-filesystem`,
options: {
name: `blurp`,
path: __dirname + '/gendata.csv',
},
},
`gatsby-transformer-remark`,
`gatsby-transformer-csv`,
],
}
36 changes: 21 additions & 15 deletions benchmarks/gabe-csv-markdown/gatsby-node.js
Expand Up @@ -4,15 +4,18 @@ const { createFilePath } = require(`gatsby-source-filesystem`)
exports.createPages = async ({ graphql, actions }) => {
const { createPage } = actions
const blogPost = path.resolve(`./src/templates/blog-post.js`)

const result = await graphql(
`
{
allGendataCsv (sort: { fields: [date], order: DESC }) {
allMarkdownRemark(sort: null) {
edges {
node {
id
slug
title
frontmatter {
slug
title
}
}
}
}
Expand All @@ -25,17 +28,17 @@ exports.createPages = async ({ graphql, actions }) => {
}

// Create blog posts pages.
const posts = result.data.allGendataCsv.edges
const posts = result.data.allMarkdownRemark.edges

posts.forEach((post, index) => {
const previous = index === posts.length - 1 ? null : posts[index + 1].node
const next = index === 0 ? null : posts[index - 1].node

createPage({
path: post.node.slug,
path: post.node.frontmatter.slug,
component: blogPost,
context: {
slug: post.node.slug,
slug: post.node.frontmatter.slug,
id: post.node.id,
previous,
next,
Expand All @@ -44,16 +47,19 @@ exports.createPages = async ({ graphql, actions }) => {
})
}

// Use this to keep in sync with markdown benchmark. TODO: drop this and see the difference.
exports.onCreateNode = ({ node, actions, getNode }) => {
const { createNodeField } = actions
exports.onCreateNode = ({ node, actions }) => {
const { createNode } = actions

if (node.internal.type === `DataCsv`) {
createNodeField({
name: `slug2`,
node,
value: './' + node.slug,
if (node.internal.type === `GendataCsv`) {
createNode({
id: `${node.id}-MarkdownProxy`,
parent: node.id,
internal: {
type: `MarkdownProxy`,
mediaType: "text/markdown",
content: node.articleContent,
contentDigest: String(Math.random()),
},
})
}
}

54 changes: 29 additions & 25 deletions benchmarks/gabe-csv-markdown/gen.js
Expand Up @@ -7,44 +7,48 @@ console.log("Start of gen")
const N = parseInt(process.env.N, 10) || 100
const FILE = path.resolve("gendata.csv")

// We may want to tweak this a little but for this purpose we have a CSV with one column; the full page contents
// We then hand that off to markdown

console.log("Now generating " + N + " articles into", FILE)
fs.writeFileSync(FILE, "articleNumber,title,description,slug,date,tags,body\n")
fs.writeFileSync(FILE, "articleContent,a,b,c\n")

function createArticle(n) {
const title = faker.lorem.sentence()
const slug = faker.helpers.slugify(title).toLowerCase()
const desc = faker.lorem.sentence()
const slug = faker.helpers.slugify(title).toLowerCase()
const date = faker.date.recent(1000).toISOString().slice(0, 10)
const tags = faker.random
.words(3)
.split(` `)
.map(w => `"${w}"`)
.join(`, `)

const pageContent = `---
articleNumber: ${n}
title: "${title.replace(/"/g, '\\"')}"
description: "${desc.replace(/"/g, '\\"')}"
slug: '${slug}'
date: ${date}
tags: [${tags}]
---
# ${title}
> ${desc}
${faker.lorem.paragraphs(2)}
`

// Note: you can only escape double quotes (by doubling them, not by backslash)
// any other content needs to be wrapped in double quotes and is consumed as-is (including newlines and commas)
fs.appendFileSync(
FILE,
[
// 'a','b','c','d','e','f', 'g'
String(n),
title,
desc,
slug,
date,
tags,
`
<h1>${title}</h1>
<blockquote>${desc}</blockquote>
<p>${faker.lorem.paragraphs(1)}</p>
<p>${faker.lorem.paragraphs(1)}</p>
`,
]
.map(s =>
s
.trim()
// Need to escape newlines and commas
.replace(/,/g, "\\,")
.replace(/\n/g, "") // html don't care about newlines
)
.join(",") + "\n"

'"' + pageContent
.trim()
.replace(/"/g, '""')
+ '",1,2,3' +
"\n" // markdown does care about newlines
)
}

Expand Down
7 changes: 4 additions & 3 deletions benchmarks/gabe-csv-markdown/package.json
@@ -1,12 +1,12 @@
{
"name": "gabe-csv-text",
"name": "gabe-csv-markdown",
"private": true,
"description": "Benchmark site for testing baseline csv+plaintext perf",
"description": "Benchmark site for testing baseline markdown perf when sourcing all pages through one CSV file",
"author": "Peter van der Zee <pvdz@github>",
"version": "0.1.0",
"license": "MIT",
"scripts": {
"bench": "rm -rf generated_articles; gatsby clean; N=${N:-512} node gen.js; CI=1 node --max_old_space_size=${M:-2}000 node_modules/.bin/gatsby build",
"bench": "rm -rf gendata.csv; gatsby clean; N=${N:-512} node gen.js; CI=1 node --max_old_space_size=${M:-2}000 node_modules/.bin/gatsby build",
"build": "gatsby build",
"clean": "gatsby clean",
"develop": "gatsby develop",
Expand All @@ -32,6 +32,7 @@
"gatsby": "^2",
"gatsby-source-filesystem": "^2",
"gatsby-transformer-csv": "^2",
"gatsby-transformer-remark": "^2",
"react": "^16.12.0",
"react-dom": "^16.12.0"
}
Expand Down
28 changes: 14 additions & 14 deletions benchmarks/gabe-csv-markdown/src/pages/index.js
Expand Up @@ -8,31 +8,31 @@ class BlogIndex extends React.Component {
render() {
const { data } = this.props
const siteTitle = data.site.siteMetadata.title
const posts = data.allGendataCsv.edges
const posts = data.allMarkdownRemark.edges

return (
<Layout location={this.props.location} title={siteTitle}>
<Bio />
{posts.map(({ node }) => {
const title = node.title || node.slug
const title = node.frontmatter.title || node.frontmatter.slug
return (
<article key={node.slug}>
<article key={node.frontmatter.slug}>
<header>
<h3
style={{
marginBottom: '5px',
}}
>
<Link style={{ boxShadow: `none` }} to={node.slug}>
<Link style={{ boxShadow: `none` }} to={'/' + node.frontmatter.slug}>
{title}
</Link>
</h3>
<small>{node.date}</small>
<small>{node.frontmatter.date}</small>
</header>
<section>
<p
dangerouslySetInnerHTML={{
__html: node.description,
__html: node.frontmatter.description || node.excerpt,
}}
/>
</section>
Expand All @@ -53,16 +53,16 @@ export const pageQuery = graphql`
title
}
}
allGendataCsv(limit: 100) {
allMarkdownRemark(limit: 100) {
edges {
node {
articleNumber
title
description
slug
date
tags
body
excerpt
frontmatter {
slug
date(formatString: "MMMM DD, YYYY")
title
description
}
}
}
}
Expand Down
45 changes: 19 additions & 26 deletions benchmarks/gabe-csv-markdown/src/templates/blog-post.js
Expand Up @@ -6,8 +6,7 @@ import Layout from "../components/layout"

class BlogPostTemplate extends React.Component {
render() {
const post = this.props.data
const node = post.allGendataCsv.edges[0].node
const post = this.props.data.markdownRemark
const siteTitle = this.props.data.site.siteMetadata.title
const { previous, next } = this.props.pageContext

Expand All @@ -21,18 +20,18 @@ class BlogPostTemplate extends React.Component {
marginBottom: 0,
}}
>
{node.title}
{post.frontmatter.title}
</h1>
<p
style={{
display: `block`,
marginBottom: '5px',
}}
>
{node.date}
{post.frontmatter.date}
</p>
</header>
<section dangerouslySetInnerHTML={{ __html: node.body }} />
<section dangerouslySetInnerHTML={{ __html: post.html }} />
<hr
style={{
marginBottom: '5px',
Expand All @@ -55,15 +54,15 @@ class BlogPostTemplate extends React.Component {
>
<li>
{previous && (
<Link to={'../' + previous.slug} rel="prev">
{previous.title}
<Link to={'/' + previous.frontmatter.slug} rel="prev">
{previous.frontmatter.title}
</Link>
)}
</li>
<li>
{next && (
<Link to={'../' + next.slug} rel="next">
{next.title}
<Link to={'/' + next.frontmatter.slug} rel="next">
{next.frontmatter.title}
</Link>
)}
</li>
Expand All @@ -77,28 +76,22 @@ class BlogPostTemplate extends React.Component {
export default BlogPostTemplate

export const pageQuery = graphql`
query($id: String!) {
query BlogPostById($id: String!) {
site {
siteMetadata {
title
}
}
allGendataCsv(
filter: {
id: { eq: $id }
}
) {
edges {
node {
articleNumber
title
description
slug
date
tags
body
}
markdownRemark(id: { eq: $id }) {
id
excerpt(pruneLength: 160)
html
frontmatter {
slug
title
date(formatString: "MMMM DD, YYYY")
description
}
}
}
`;
`

0 comments on commit 866abdf

Please sign in to comment.