perf(benchmarks): add gabe-csv-markdown benchmark (#26977)

gatsbyjs · Sep 21, 2020 · 866abdf · 866abdf
1 parent 8ad565f
commit 866abdf
Show file tree

Hide file tree

Showing 8 changed files with 96 additions and 90 deletions.
diff --git a/benchmarks/gabe-csv-markdown/README.md b/benchmarks/gabe-csv-markdown/README.md
@@ -1,10 +1,10 @@
-# Markdown Benchmark; CSV+text version
+# Baseline Gatsby Benchmark: csv + markdown
 
-This is a baseline benchmark for tracking CSV plaintext performance in the Gabe project.
+This is a baseline benchmark site in the Gabe project.
 
-This will produce the same site as `gabe-markdown` without using any markdown. It also generates one giant csv file containing all the data, rather than an individual file per page.
+This site in particular tracks Markdown performance when sourcing from a single CSV file.
 
-The site can generate an arbitrary amount of super simple pages. Each page has a small header, a quote, and two small paragraphs of random text. No images, because that's a fixed cost we're not interested in.
+The site can generate an arbitrary amount of super simple pages. Each page has a small header, a quote, and two small paragraphs of random text. No images, because we want to benchmark Markdown.
 
 ## Install
 
@@ -21,7 +21,7 @@ N=1000 M=2 yarn bench
 - `N=1000`: instructs the run to build a site of 1000 pages
 - `M=2`: instructs nodejs to use up to 2gb of memory for its long term storage
 - Deletes generates files from previous run
-- Generates a new `gendata.csv` file containing `N` rows, each row being one page with pseudo-random content
+- Generates `N` pages with pseudo-random content
 - Runs `gatsby clean`
 - Runs `gatsby build`
 

diff --git a/benchmarks/gabe-csv-markdown/gatsby-browser.js b/benchmarks/gabe-csv-markdown/gatsby-browser.js
@@ -0,0 +1 @@
+// empty.
diff --git a/benchmarks/gabe-csv-markdown/gatsby-config.js b/benchmarks/gabe-csv-markdown/gatsby-config.js
@@ -5,13 +5,14 @@ module.exports = {
     author: "Bob the Blogger",
   },
   plugins: [
-    `gatsby-transformer-csv`,
-    `gatsby-transformer-remark`,
     {
       resolve: `gatsby-source-filesystem`,
       options: {
+        name: `blurp`,
         path: __dirname + '/gendata.csv',
       },
     },
+    `gatsby-transformer-remark`,
+    `gatsby-transformer-csv`,
   ],
 }
diff --git a/benchmarks/gabe-csv-markdown/gatsby-node.js b/benchmarks/gabe-csv-markdown/gatsby-node.js
@@ -4,15 +4,18 @@ const { createFilePath } = require(`gatsby-source-filesystem`)
 exports.createPages = async ({ graphql, actions }) => {
   const { createPage } = actions
   const blogPost = path.resolve(`./src/templates/blog-post.js`)
+
   const result = await graphql(
     `
       {
-        allGendataCsv (sort: { fields: [date], order: DESC }) {
+        allMarkdownRemark(sort: null) {
           edges {
             node {
               id
-              slug
-              title
+              frontmatter {
+                slug
+                title
+              }
             }
           }
         }
@@ -25,17 +28,17 @@ exports.createPages = async ({ graphql, actions }) => {
   }
 
   // Create blog posts pages.
-  const posts = result.data.allGendataCsv.edges
+  const posts = result.data.allMarkdownRemark.edges
 
   posts.forEach((post, index) => {
     const previous = index === posts.length - 1 ? null : posts[index + 1].node
     const next = index === 0 ? null : posts[index - 1].node
 
     createPage({
-      path: post.node.slug,
+      path: post.node.frontmatter.slug,
       component: blogPost,
       context: {
-        slug: post.node.slug,
+        slug: post.node.frontmatter.slug,
         id: post.node.id,
         previous,
         next,
@@ -44,16 +47,19 @@ exports.createPages = async ({ graphql, actions }) => {
   })
 }
 
-// Use this to keep in sync with markdown benchmark. TODO: drop this and see the difference.
-exports.onCreateNode = ({ node, actions, getNode }) => {
-  const { createNodeField } = actions
+exports.onCreateNode = ({ node, actions }) => {
+  const { createNode } = actions
 
-  if (node.internal.type === `DataCsv`) {
-    createNodeField({
-      name: `slug2`,
-      node,
-      value: './' + node.slug,
+  if (node.internal.type === `GendataCsv`) {
+    createNode({
+      id: `${node.id}-MarkdownProxy`,
+      parent: node.id,
+      internal: {
+        type: `MarkdownProxy`,
+        mediaType: "text/markdown",
+        content: node.articleContent,
+        contentDigest: String(Math.random()),
+      },
     })
   }
 }
-
diff --git a/benchmarks/gabe-csv-markdown/gen.js b/benchmarks/gabe-csv-markdown/gen.js
@@ -7,44 +7,48 @@ console.log("Start of gen")
 const N = parseInt(process.env.N, 10) || 100
 const FILE = path.resolve("gendata.csv")
 
+// We may want to tweak this a little but for this purpose we have a CSV with one column; the full page contents
+// We then hand that off to markdown
+
 console.log("Now generating " + N + " articles into", FILE)
-fs.writeFileSync(FILE, "articleNumber,title,description,slug,date,tags,body\n")
+fs.writeFileSync(FILE, "articleContent,a,b,c\n")
 
 function createArticle(n) {
   const title = faker.lorem.sentence()
-  const slug = faker.helpers.slugify(title).toLowerCase()
   const desc = faker.lorem.sentence()
+  const slug = faker.helpers.slugify(title).toLowerCase()
   const date = faker.date.recent(1000).toISOString().slice(0, 10)
   const tags = faker.random
     .words(3)
     .split(` `)
     .map(w => `"${w}"`)
     .join(`, `)
+
+  const pageContent = `---
+articleNumber: ${n}
+title: "${title.replace(/"/g, '\\"')}"
+description: "${desc.replace(/"/g, '\\"')}"
+slug: '${slug}'
+date: ${date}
+tags: [${tags}]
+---
+
+# ${title}
+
+> ${desc}
+${faker.lorem.paragraphs(2)}
+  `
+
+  // Note: you can only escape double quotes (by doubling them, not by backslash)
+  //       any other content needs to be wrapped in double quotes and is consumed as-is (including newlines and commas)
   fs.appendFileSync(
     FILE,
-    [
-      // 'a','b','c','d','e','f', 'g'
-      String(n),
-      title,
-      desc,
-      slug,
-      date,
-      tags,
-      `
-<h1>${title}</h1>
-<blockquote>${desc}</blockquote>
-<p>${faker.lorem.paragraphs(1)}</p>
-<p>${faker.lorem.paragraphs(1)}</p>
-      `,
-    ]
-      .map(s =>
-        s
-          .trim()
-          // Need to escape newlines and commas
-          .replace(/,/g, "\\,")
-          .replace(/\n/g, "") // html don't care about newlines
-      )
-      .join(",") + "\n"
+
+    '"' + pageContent
+      .trim()
+      .replace(/"/g, '""')
+      + '",1,2,3' +
+      "\n" // markdown does care about newlines
   )
 }
 

diff --git a/benchmarks/gabe-csv-markdown/package.json b/benchmarks/gabe-csv-markdown/package.json
@@ -1,12 +1,12 @@
 {
-  "name": "gabe-csv-text",
+  "name": "gabe-csv-markdown",
   "private": true,
-  "description": "Benchmark site for testing baseline csv+plaintext perf",
+  "description": "Benchmark site for testing baseline markdown perf when sourcing all pages through one CSV file",
   "author": "Peter van der Zee <pvdz@github>",
   "version": "0.1.0",
   "license": "MIT",
   "scripts": {
-    "bench": "rm -rf generated_articles; gatsby clean; N=${N:-512} node gen.js; CI=1 node --max_old_space_size=${M:-2}000 node_modules/.bin/gatsby build",
+    "bench": "rm -rf gendata.csv; gatsby clean; N=${N:-512} node gen.js; CI=1 node --max_old_space_size=${M:-2}000 node_modules/.bin/gatsby build",
     "build": "gatsby build",
     "clean": "gatsby clean",
     "develop": "gatsby develop",
@@ -32,6 +32,7 @@
     "gatsby": "^2",
     "gatsby-source-filesystem": "^2",
     "gatsby-transformer-csv": "^2",
+    "gatsby-transformer-remark": "^2",
     "react": "^16.12.0",
     "react-dom": "^16.12.0"
   }

diff --git a/benchmarks/gabe-csv-markdown/src/pages/index.js b/benchmarks/gabe-csv-markdown/src/pages/index.js
@@ -8,31 +8,31 @@ class BlogIndex extends React.Component {
   render() {
     const { data } = this.props
     const siteTitle = data.site.siteMetadata.title
-    const posts = data.allGendataCsv.edges
+    const posts = data.allMarkdownRemark.edges
 
     return (
       <Layout location={this.props.location} title={siteTitle}>
         <Bio />
         {posts.map(({ node }) => {
-          const title = node.title || node.slug
+          const title = node.frontmatter.title || node.frontmatter.slug
           return (
-            <article key={node.slug}>
+            <article key={node.frontmatter.slug}>
               <header>
                 <h3
                   style={{
                     marginBottom: '5px',
                   }}
                 >
-                  <Link style={{ boxShadow: `none` }} to={node.slug}>
+                  <Link style={{ boxShadow: `none` }} to={'/' + node.frontmatter.slug}>
                     {title}
                   </Link>
                 </h3>
-                <small>{node.date}</small>
+                <small>{node.frontmatter.date}</small>
               </header>
               <section>
                 <p
                   dangerouslySetInnerHTML={{
-                    __html: node.description,
+                    __html: node.frontmatter.description || node.excerpt,
                   }}
                 />
               </section>
@@ -53,16 +53,16 @@ export const pageQuery = graphql`
         title
       }
     }
-    allGendataCsv(limit: 100) {
+    allMarkdownRemark(limit: 100) {
       edges {
         node {
-          articleNumber
-          title
-          description
-          slug
-          date
-          tags
-          body
+          excerpt
+          frontmatter {
+            slug
+            date(formatString: "MMMM DD, YYYY")
+            title
+            description
+          }
         }
       }
     }

diff --git a/benchmarks/gabe-csv-markdown/src/templates/blog-post.js b/benchmarks/gabe-csv-markdown/src/templates/blog-post.js
@@ -6,8 +6,7 @@ import Layout from "../components/layout"
 
 class BlogPostTemplate extends React.Component {
   render() {
-    const post = this.props.data
-    const node = post.allGendataCsv.edges[0].node
+    const post = this.props.data.markdownRemark
     const siteTitle = this.props.data.site.siteMetadata.title
     const { previous, next } = this.props.pageContext
 
@@ -21,18 +20,18 @@ class BlogPostTemplate extends React.Component {
                 marginBottom: 0,
               }}
             >
-              {node.title}
+              {post.frontmatter.title}
             </h1>
             <p
               style={{
                 display: `block`,
                 marginBottom: '5px',
               }}
             >
-              {node.date}
+              {post.frontmatter.date}
             </p>
           </header>
-          <section dangerouslySetInnerHTML={{ __html: node.body }} />
+          <section dangerouslySetInnerHTML={{ __html: post.html }} />
           <hr
             style={{
               marginBottom: '5px',
@@ -55,15 +54,15 @@ class BlogPostTemplate extends React.Component {
           >
             <li>
               {previous && (
-                <Link to={'../' + previous.slug} rel="prev">
-                  ← {previous.title}
+                <Link to={'/' + previous.frontmatter.slug} rel="prev">
+                  ← {previous.frontmatter.title}
                 </Link>
               )}
             </li>
             <li>
               {next && (
-                <Link to={'../' + next.slug} rel="next">
-                  {next.title} →
+                <Link to={'/' + next.frontmatter.slug} rel="next">
+                  {next.frontmatter.title} →
                 </Link>
               )}
             </li>
@@ -77,28 +76,22 @@ class BlogPostTemplate extends React.Component {
 export default BlogPostTemplate
 
 export const pageQuery = graphql`
-  query($id: String!) {
+  query BlogPostById($id: String!) {
     site {
       siteMetadata {
         title
       }
     }
-    allGendataCsv(
-      filter: {
-         id: { eq: $id }
-      }
-    ) {
-      edges {
-        node {
-          articleNumber
-          title
-          description
-          slug
-          date
-          tags
-          body
-        }
+    markdownRemark(id: { eq: $id }) {
+      id
+      excerpt(pruneLength: 160)
+      html
+      frontmatter {
+        slug
+        title
+        date(formatString: "MMMM DD, YYYY")
+        description
       }
     }
   }
-`;
+`