Skip to content

Commit e1324a1

Browse files
authoredMar 24, 2023
fix: add sharding to s3 blockstore (#202)
To avoid non-obvious performance footguns shard by default
1 parent 405250f commit e1324a1

File tree

3 files changed

+131
-83
lines changed

3 files changed

+131
-83
lines changed
 

‎packages/blockstore-s3/src/index.ts

+12-37
Original file line numberDiff line numberDiff line change
@@ -13,43 +13,29 @@ import {
1313
DeleteObjectCommand,
1414
ListObjectsV2Command
1515
} from '@aws-sdk/client-s3'
16-
import { CID } from 'multiformats/cid'
17-
import { base32upper } from 'multiformats/bases/base32'
18-
import type { MultibaseCodec } from 'multiformats/bases/interface'
16+
import type { CID } from 'multiformats/cid'
17+
import { NextToLast, ShardingStrategy } from './sharding.js'
1918

2019
export interface S3DatastoreInit {
21-
/**
22-
* An optional path to use within the bucket for all files - this setting can
23-
* affect S3 performance as it does internal sharding based on 'prefixes' -
24-
* these can be delimited by '/' so it's often better to wrap this datastore in
25-
* a sharding datastore which will generate prefixed datastore keys for you.
26-
*
27-
* See - https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance.html
28-
* and https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html
29-
*/
30-
path?: string
31-
3220
/**
3321
* Whether to try to create the bucket if it is missing when `.open` is called
3422
*/
3523
createIfMissing?: boolean
3624

3725
/**
38-
* The multibase codec to use - nb. should be case insensitive.
39-
* default: base32upper
26+
* Control how CIDs map to paths and back
4027
*/
41-
base?: MultibaseCodec<string>
28+
shardingStrategy?: ShardingStrategy
4229
}
4330

4431
/**
4532
* A blockstore backed by AWS S3
4633
*/
4734
export class S3Blockstore extends BaseBlockstore {
48-
public path?: string
4935
public createIfMissing: boolean
5036
private readonly s3: S3
5137
private readonly bucket: string
52-
private readonly base: MultibaseCodec<string>
38+
private readonly shardingStrategy: ShardingStrategy
5339

5440
constructor (s3: S3, bucket: string, init?: S3DatastoreInit) {
5541
super()
@@ -62,21 +48,10 @@ export class S3Blockstore extends BaseBlockstore {
6248
throw new Error('An bucket must be supplied. See the datastore-s3 README for examples.')
6349
}
6450

65-
this.path = init?.path
6651
this.s3 = s3
6752
this.bucket = bucket
6853
this.createIfMissing = init?.createIfMissing ?? false
69-
this.base = init?.base ?? base32upper
70-
}
71-
72-
/**
73-
* Returns the full key which includes the path to the ipfs store
74-
*/
75-
_getFullKey (cid: CID): string {
76-
// Avoid absolute paths with s3
77-
const str = this.base.encoder.encode(cid.multihash.bytes)
78-
79-
return [this.path, str].filter(Boolean).join('/').replace(/\/\/+/g, '/')
54+
this.shardingStrategy = init?.shardingStrategy ?? new NextToLast()
8055
}
8156

8257
/**
@@ -88,7 +63,7 @@ export class S3Blockstore extends BaseBlockstore {
8863
await this.s3.send(
8964
new PutObjectCommand({
9065
Bucket: this.bucket,
91-
Key: this._getFullKey(key),
66+
Key: this.shardingStrategy.encode(key),
9267
Body: val
9368
}), {
9469
abortSignal: options?.signal
@@ -110,7 +85,7 @@ export class S3Blockstore extends BaseBlockstore {
11085
const data = await this.s3.send(
11186
new GetObjectCommand({
11287
Bucket: this.bucket,
113-
Key: this._getFullKey(key)
88+
Key: this.shardingStrategy.encode(key)
11489
}), {
11590
abortSignal: options?.signal
11691
}
@@ -154,7 +129,7 @@ export class S3Blockstore extends BaseBlockstore {
154129
await this.s3.send(
155130
new HeadObjectCommand({
156131
Bucket: this.bucket,
157-
Key: this._getFullKey(key)
132+
Key: this.shardingStrategy.encode(key)
158133
}), {
159134
abortSignal: options?.signal
160135
}
@@ -185,7 +160,7 @@ export class S3Blockstore extends BaseBlockstore {
185160
await this.s3.send(
186161
new DeleteObjectCommand({
187162
Bucket: this.bucket,
188-
Key: this._getFullKey(key)
163+
Key: this.shardingStrategy.encode(key)
189164
}), {
190165
abortSignal: options?.signal
191166
}
@@ -224,7 +199,7 @@ export class S3Blockstore extends BaseBlockstore {
224199
}
225200

226201
// Remove the path from the key
227-
const cid = CID.decode(this.base.decoder.decode(d.Key.slice((this.path ?? '').length)))
202+
const cid = this.shardingStrategy.decode(d.Key)
228203

229204
yield {
230205
cid,
@@ -257,7 +232,7 @@ export class S3Blockstore extends BaseBlockstore {
257232
await this.s3.send(
258233
new HeadObjectCommand({
259234
Bucket: this.bucket,
260-
Key: this.path ?? ''
235+
Key: ''
261236
}), {
262237
abortSignal: options?.signal
263238
}
+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import { CID } from 'multiformats/cid'
2+
import { base32upper } from 'multiformats/bases/base32'
3+
import type { MultibaseCodec } from 'multiformats/bases/interface'
4+
5+
export interface ShardingStrategy {
6+
extension: string
7+
encode: (cid: CID) => string
8+
decode: (path: string) => CID
9+
}
10+
11+
export interface NextToLastInit {
12+
/**
13+
* The file extension to use. default: '.data'
14+
*/
15+
extension?: string
16+
17+
/**
18+
* How many characters to take from the end of the CID. default: 2
19+
*/
20+
prefixLength?: number
21+
22+
/**
23+
* The multibase codec to use - nb. should be case insensitive.
24+
* default: base32upper
25+
*/
26+
base?: MultibaseCodec<string>
27+
}
28+
29+
/**
30+
* A sharding strategy that takes the last few characters of a multibase encoded
31+
* CID and uses them as the directory to store the block in. This prevents
32+
* storing all blocks in a single directory which would overwhelm most
33+
* filesystems.
34+
*/
35+
export class NextToLast implements ShardingStrategy {
36+
public extension: string
37+
private readonly prefixLength: number
38+
private readonly base: MultibaseCodec<string>
39+
40+
constructor (init: NextToLastInit = {}) {
41+
this.extension = init.extension ?? '.data'
42+
this.prefixLength = init.prefixLength ?? 2
43+
this.base = init.base ?? base32upper
44+
}
45+
46+
encode (cid: CID): string {
47+
const str = this.base.encoder.encode(cid.multihash.bytes)
48+
const prefix = str.substring(str.length - this.prefixLength)
49+
50+
return `${prefix}/${str}${this.extension}`
51+
}
52+
53+
decode (str: string): CID {
54+
let fileName = str.split('/').pop()
55+
56+
if (fileName == null) {
57+
throw new Error('Invalid path')
58+
}
59+
60+
if (fileName.endsWith(this.extension)) {
61+
fileName = fileName.substring(0, fileName.length - this.extension.length)
62+
}
63+
64+
return CID.decode(this.base.decoder.decode(fileName))
65+
}
66+
}
67+
68+
export interface FlatDirectoryInit {
69+
/**
70+
* The file extension to use. default: '.data'
71+
*/
72+
extension?: string
73+
74+
/**
75+
* How many characters to take from the end of the CID. default: 2
76+
*/
77+
prefixLength?: number
78+
79+
/**
80+
* The multibase codec to use - nb. should be case insensitive.
81+
* default: base32padupper
82+
*/
83+
base?: MultibaseCodec<string>
84+
}
85+
86+
/**
87+
* A sharding strategy that does not do any sharding and stores all files
88+
* in one directory. Only for testing, do not use in production.
89+
*/
90+
export class FlatDirectory implements ShardingStrategy {
91+
public extension: string
92+
private readonly base: MultibaseCodec<string>
93+
94+
constructor (init: NextToLastInit = {}) {
95+
this.extension = init.extension ?? '.data'
96+
this.base = init.base ?? base32upper
97+
}
98+
99+
encode (cid: CID): string {
100+
const str = this.base.encoder.encode(cid.multihash.bytes)
101+
102+
return `${str}${this.extension}`
103+
}
104+
105+
decode (str: string): CID {
106+
let fileName = str.split('/').pop()
107+
108+
if (fileName == null) {
109+
throw new Error('Invalid path')
110+
}
111+
112+
if (fileName.endsWith(this.extension)) {
113+
fileName = fileName.substring(0, fileName.length - this.extension.length)
114+
}
115+
116+
return CID.decode(this.base.decoder.decode(fileName))
117+
}
118+
}

‎packages/blockstore-s3/test/index.spec.ts

+1-46
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import { expect } from 'aegir/chai'
44
import sinon from 'sinon'
5-
import { CreateBucketCommand, PutObjectCommand, HeadObjectCommand, S3, GetObjectCommand } from '@aws-sdk/client-s3'
5+
import { CreateBucketCommand, HeadObjectCommand, S3 } from '@aws-sdk/client-s3'
66
import defer from 'p-defer'
77
import { interfaceBlockstoreTests } from 'interface-blockstore-tests'
88
import { CID } from 'multiformats/cid'
@@ -43,25 +43,6 @@ describe('S3Blockstore', () => {
4343
})
4444

4545
describe('put', () => {
46-
it('should include the path in the key', async () => {
47-
const s3 = new S3({ region: 'REGION' })
48-
const store = new S3Blockstore(s3, 'test', {
49-
path: '.ipfs/datastore'
50-
})
51-
52-
const deferred = defer<PutObjectCommand>()
53-
54-
sinon.replace(s3, 'send', (command: PutObjectCommand) => {
55-
deferred.resolve(command)
56-
return s3Resolve(null)
57-
})
58-
59-
await store.put(cid, new TextEncoder().encode('test data'))
60-
61-
const command = await deferred.promise
62-
expect(command).to.have.nested.property('input.Key', '.ipfs/datastore/BCIQPGZJ6QLZOFG3OP45NLMSJUWGJCO72QQKHLDTB6FXIB6BDSLRQYLY')
63-
})
64-
6546
it('should return a standard error when the put fails', async () => {
6647
const s3 = new S3({ region: 'REGION' })
6748
const store = new S3Blockstore(s3, 'test')
@@ -80,32 +61,6 @@ describe('S3Blockstore', () => {
8061
})
8162

8263
describe('get', () => {
83-
it('should include the path in the fetch key', async () => {
84-
const s3 = new S3({ region: 'REGION' })
85-
const store = new S3Blockstore(s3, 'test', {
86-
path: '.ipfs/datastore'
87-
})
88-
const buf = new TextEncoder().encode('test')
89-
90-
const deferred = defer<GetObjectCommand>()
91-
92-
sinon.replace(s3, 'send', (command: any) => {
93-
if (command.constructor.name === 'GetObjectCommand') {
94-
deferred.resolve(command)
95-
return s3Resolve({ Body: buf })
96-
}
97-
98-
return s3Reject(new S3Error('UnknownCommand'))
99-
})
100-
101-
const value = await store.get(cid)
102-
103-
expect(value).to.equalBytes(buf)
104-
105-
const getObjectCommand = await deferred.promise
106-
expect(getObjectCommand).to.have.nested.property('input.Key', '.ipfs/datastore/BCIQPGZJ6QLZOFG3OP45NLMSJUWGJCO72QQKHLDTB6FXIB6BDSLRQYLY')
107-
})
108-
10964
it('should return a standard not found error code if the key isn\'t found', async () => {
11065
const s3 = new S3({ region: 'REGION' })
11166
const store = new S3Blockstore(s3, 'test')

0 commit comments

Comments
 (0)
Please sign in to comment.