Skip to content
This repository was archived by the owner on Feb 12, 2024. It is now read-only.

Commit 8a3ed19

Browse files
authoredNov 12, 2021
fix: do not lose files when writing files into subshards that contain other subshards (#3936)
When writing a file into a hamt shard we hash the filename to figure out where in the shard to place the file. If the hash means that we end up adding the file into an existing subshard that also contains another subshard, we should populate the other subshard's children otherwise they will not be there when we calculate the new CID for the subshard and they will be lost. Fixes #3921
1 parent 15184bf commit 8a3ed19

File tree

6 files changed

+130
-26
lines changed

6 files changed

+130
-26
lines changed
 

‎.github/workflows/test.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ jobs:
347347
deps: ipfs-core@$PWD/packages/ipfs-core/dist
348348
- name: ipfs browser service worker
349349
repo: https://github.com/ipfs-examples/js-ipfs-browser-service-worker.git
350-
deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-message-port-client/dist@$PWD/packages/ipfs-message-port-client/dist,ipfs-message-port-protocol@$PWD/packages/ipfs-message-port-protocol/dist,ipfs-message-port-server@$PWD/packages/ipfs-message-port-server/dist
350+
deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-message-port-client@$PWD/packages/ipfs-message-port-client/dist,ipfs-message-port-protocol@$PWD/packages/ipfs-message-port-protocol/dist,ipfs-message-port-server@$PWD/packages/ipfs-message-port-server/dist
351351
- name: ipfs browser sharing across tabs
352352
repo: https://github.com/ipfs-examples/js-ipfs-browser-sharing-node-across-tabs.git
353353
deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-message-port-client@$PWD/packages/ipfs-message-port-client/dist,ipfs-message-port-server@$PWD/packages/ipfs-message-port-server/dist
@@ -365,7 +365,7 @@ jobs:
365365
deps: ipfs-core@$PWD/packages/ipfs-core/dist
366366
- name: ipfs custom ipfs repo
367367
repo: https://github.com/ipfs-examples/js-ipfs-custom-ipfs-repo.git
368-
deps: ipfs@$PWD/packages/ipfs/dist
368+
deps: ipfs-core@$PWD/packages/ipfs-core/dist
369369
- name: ipfs custom ipld formats
370370
repo: https://github.com/ipfs-examples/js-ipfs-custom-ipld-formats.git
371371
deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-daemon@$PWD/packages/ipfs-daemon/dist,ipfs-http-client@$PWD/packages/ipfs-http-client/dist
@@ -380,7 +380,7 @@ jobs:
380380
deps: ipfs-http-client@$PWD/packages/ipfs-http-client/dist,ipfs@$PWD/packages/ipfs/dist
381381
- name: ipfs-http-client name api
382382
repo: https://github.com/ipfs-examples/js-ipfs-http-client-name-api.git
383-
deps: ipfs@$PWD/packages/ipfs/dist,ipfs-http-client@$PWD/packages/ipfs-http-client/dist
383+
deps: ipfs-http-client@$PWD/packages/ipfs-http-client/dist
384384
- name: ipfs-http-client upload file
385385
repo: https://github.com/ipfs-examples/js-ipfs-http-client-upload-file.git
386386
deps: ipfs@$PWD/packages/ipfs/dist,ipfs-http-client@$PWD/packages/ipfs-http-client/dist

‎packages/interface-ipfs-core/src/files/write.js

+44
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { randomBytes, randomStream } from 'iso-random-stream'
1414
import all from 'it-all'
1515
import isShardAtPath from '../utils/is-shard-at-path.js'
1616
import * as raw from 'multiformats/codecs/raw'
17+
import map from 'it-map'
1718

1819
/**
1920
* @typedef {import('ipfsd-ctl').Factory} Factory
@@ -903,6 +904,49 @@ export function testWrite (factory, options) {
903904
long: true
904905
}))).to.eventually.not.be.empty()
905906
})
907+
908+
it('writes a file to a sub-shard of a shard that contains another sub-shard', async () => {
909+
const data = Uint8Array.from([0, 1, 2])
910+
911+
await ipfs.files.mkdir('/hamttest-mfs')
912+
913+
const files = [
914+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1398.txt',
915+
'vivanov-sliceart',
916+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1230.txt',
917+
'methodify',
918+
'fis-msprd-style-loader_0_13_1',
919+
'js-form',
920+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1181.txt',
921+
'node-gr',
922+
'yanvoidmodule',
923+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1899.txt',
924+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-372.txt',
925+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1032.txt',
926+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1293.txt',
927+
'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-766.txt'
928+
]
929+
930+
for (const path of files) {
931+
await ipfs.files.write(`/hamttest-mfs/${path}`, data, {
932+
shardSplitThreshold: 0,
933+
create: true
934+
})
935+
}
936+
937+
const beforeFiles = await all(map(ipfs.files.ls('/hamttest-mfs'), (entry) => entry.name))
938+
939+
expect(beforeFiles).to.have.lengthOf(files.length)
940+
941+
await ipfs.files.write('/hamttest-mfs/supermodule_test', data, {
942+
shardSplitThreshold: 0,
943+
create: true
944+
})
945+
946+
const afterFiles = await all(map(ipfs.files.ls('/hamttest-mfs'), (entry) => entry.name))
947+
948+
expect(afterFiles).to.have.lengthOf(beforeFiles.length + 1)
949+
})
906950
})
907951
})
908952
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { UnixFS } from 'ipfs-unixfs'
2+
3+
/**
4+
* @param {string} path
5+
* @param {import('ipfs-core-types').IPFS} ipfs
6+
*/
7+
export default async function dumpShard (path, ipfs) {
8+
const stats = await ipfs.files.stat(path)
9+
const { value: node } = await ipfs.dag.get(stats.cid)
10+
const entry = UnixFS.unmarshal(node.Data)
11+
12+
if (entry.type !== 'hamt-sharded-directory') {
13+
throw new Error('Not a shard')
14+
}
15+
16+
await dumpSubShard(stats.cid, ipfs)
17+
}
18+
19+
/**
20+
* @param {import('multiformats/cid').CID} cid
21+
* @param {import('ipfs-core-types').IPFS} ipfs
22+
* @param {string} prefix
23+
*/
24+
async function dumpSubShard (cid, ipfs, prefix = '') {
25+
const { value: node } = await ipfs.dag.get(cid)
26+
const entry = UnixFS.unmarshal(node.Data)
27+
28+
if (entry.type !== 'hamt-sharded-directory') {
29+
throw new Error('Not a shard')
30+
}
31+
32+
for (const link of node.Links) {
33+
const { value: subNode } = await ipfs.dag.get(link.Hash)
34+
const subEntry = UnixFS.unmarshal(subNode.Data)
35+
console.info(`${prefix}${link.Name}`, ' ', subEntry.type) // eslint-disable-line no-console
36+
37+
if (link.Name.length === 2) {
38+
await dumpSubShard(link.Hash, ipfs, `${prefix} `)
39+
}
40+
}
41+
}

‎packages/ipfs-core/src/components/files/utils/add-link.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ const addFileToShardedDirectory = async (context, options) => {
339339
// subshard hasn't been loaded, descend to the next level of the HAMT
340340
if (!path[index]) {
341341
log(`Loaded new subshard ${segment.prefix}`)
342-
await recreateHamtLevel(subShard.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16))
342+
await recreateHamtLevel(context, subShard.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16))
343343

344344
const position = await rootBucket._findNewBucketAndPos(file.name)
345345

@@ -355,7 +355,7 @@ const addFileToShardedDirectory = async (context, options) => {
355355
const nextSegment = path[index]
356356

357357
// add next levels worth of links to bucket
358-
await addLinksToHamtBucket(subShard.Links, nextSegment.bucket, rootBucket)
358+
await addLinksToHamtBucket(context, subShard.Links, nextSegment.bucket, rootBucket)
359359

360360
nextSegment.node = subShard
361361
}

‎packages/ipfs-core/src/components/files/utils/hamt-utils.js

+39-9
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,21 @@ export const updateHamtDirectory = async (context, links, bucket, options) => {
7272
}
7373

7474
/**
75+
* @param {MfsContext} context
7576
* @param {PBLink[]} links
7677
* @param {Bucket<any>} rootBucket
7778
* @param {Bucket<any>} parentBucket
7879
* @param {number} positionAtParent
7980
*/
80-
export const recreateHamtLevel = async (links, rootBucket, parentBucket, positionAtParent) => {
81+
export const recreateHamtLevel = async (context, links, rootBucket, parentBucket, positionAtParent) => {
8182
// recreate this level of the HAMT
8283
const bucket = new Bucket({
8384
hash: rootBucket._options.hash,
8485
bits: rootBucket._options.bits
8586
}, parentBucket, positionAtParent)
8687
parentBucket._putObjectAt(positionAtParent, bucket)
8788

88-
await addLinksToHamtBucket(links, bucket, rootBucket)
89+
await addLinksToHamtBucket(context, links, bucket, rootBucket)
8990

9091
return bucket
9192
}
@@ -99,28 +100,57 @@ export const recreateInitialHamtLevel = async (links) => {
99100
bits: hamtBucketBits
100101
})
101102

102-
await addLinksToHamtBucket(links, bucket, bucket)
103+
// populate sub bucket but do not recurse as we do not want to pull whole shard in
104+
await Promise.all(
105+
links.map(async link => {
106+
const linkName = (link.Name || '')
107+
108+
if (linkName.length === 2) {
109+
const pos = parseInt(linkName, 16)
110+
111+
const subBucket = new Bucket({
112+
hash: bucket._options.hash,
113+
bits: bucket._options.bits
114+
}, bucket, pos)
115+
bucket._putObjectAt(pos, subBucket)
116+
117+
return Promise.resolve()
118+
}
119+
120+
return bucket.put(linkName.substring(2), {
121+
size: link.Tsize,
122+
cid: link.Hash
123+
})
124+
})
125+
)
103126

104127
return bucket
105128
}
106129

107130
/**
131+
* @param {MfsContext} context
108132
* @param {PBLink[]} links
109133
* @param {Bucket<any>} bucket
110134
* @param {Bucket<any>} rootBucket
111135
*/
112-
export const addLinksToHamtBucket = async (links, bucket, rootBucket) => {
136+
export const addLinksToHamtBucket = async (context, links, bucket, rootBucket) => {
113137
await Promise.all(
114-
links.map(link => {
138+
links.map(async link => {
115139
const linkName = (link.Name || '')
116140

117141
if (linkName.length === 2) {
142+
log('Populating sub bucket', linkName)
118143
const pos = parseInt(linkName, 16)
144+
const block = await context.repo.blocks.get(link.Hash)
145+
const node = dagPB.decode(block)
119146

120-
bucket._putObjectAt(pos, new Bucket({
147+
const subBucket = new Bucket({
121148
hash: rootBucket._options.hash,
122149
bits: rootBucket._options.bits
123-
}, bucket, pos))
150+
}, bucket, pos)
151+
bucket._putObjectAt(pos, subBucket)
152+
153+
await addLinksToHamtBucket(context, node.Links, subBucket, rootBucket)
124154

125155
return Promise.resolve()
126156
}
@@ -213,7 +243,7 @@ export const generatePath = async (context, fileName, rootNode) => {
213243
if (!path[i + 1]) {
214244
log(`Loaded new subshard ${segment.prefix}`)
215245

216-
await recreateHamtLevel(node.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16))
246+
await recreateHamtLevel(context, node.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16))
217247
const position = await rootBucket._findNewBucketAndPos(fileName)
218248

219249
// i--
@@ -229,7 +259,7 @@ export const generatePath = async (context, fileName, rootNode) => {
229259
const nextSegment = path[i + 1]
230260

231261
// add intermediate links to bucket
232-
await addLinksToHamtBucket(node.Links, nextSegment.bucket, rootBucket)
262+
await addLinksToHamtBucket(context, node.Links, nextSegment.bucket, rootBucket)
233263

234264
nextSegment.node = node
235265
}

‎packages/ipfs-core/src/components/files/write.js

+1-12
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { importer } from 'ipfs-unixfs-importer'
33
import {
44
decode
55
} from '@ipld/dag-pb'
6-
import { sha256, sha512 } from 'multiformats/hashes/sha2'
76
import { createStat } from './stat.js'
87
import { createMkdir } from './mkdir.js'
98
import { addLink } from './utils/add-link.js'
@@ -293,17 +292,7 @@ const write = async (context, source, destination, options) => {
293292
mtime = destination.unixfs.mtime
294293
}
295294

296-
let hasher
297-
switch (options.hashAlg) {
298-
case 'sha2-256':
299-
hasher = sha256
300-
break
301-
case 'sha2-512':
302-
hasher = sha512
303-
break
304-
default:
305-
throw new Error(`TODO vmx 2021-03-31: Proper error message for unsupported hash algorithms like ${options.hashAlg}`)
306-
}
295+
const hasher = await context.hashers.getHasher(options.hashAlg)
307296

308297
const result = await last(importer([{
309298
content: content,

0 commit comments

Comments
 (0)
This repository has been archived.