Skip to content

Commit

Permalink
Merge pull request #2 from manidlou/add-filter-opt
Browse files Browse the repository at this point in the history
remove ignore option, add filter option
  • Loading branch information
manidlou committed Apr 23, 2017
2 parents ab77725 + 4323677 commit 3fc387f
Show file tree
Hide file tree
Showing 6 changed files with 290 additions and 266 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,13 @@
2.0.0 / 2017-04-23
------------------

### Removed
- **BREAKING:** Removed support for `ignore` option due to inconsistency in glob pattern usages and relatively poor performance. See: [#1]

### Added
- `filter` option. A function that gets one argument `fn({path: '', stats: {}})` and returns true to include or false to exclude the item.
- `noRecurseOnFailedFilter` option to prevent unnecessary traversal of unwanted directories when `filter` function is used.

1.1.2 / 2017-02-17
------------------

Expand All @@ -19,3 +29,5 @@

- initial release

[#1]: https://github.com/manidlou/node-klaw-sync/issues/1 "loading all files with certain name"

96 changes: 63 additions & 33 deletions README.md
@@ -1,5 +1,5 @@
klaw-sync
=========
node-klaw-sync
==============

[![npm Package](https://img.shields.io/npm/v/klaw-sync.svg?style=flat-square)](https://www.npmjs.com/package/klaw-sync)
[![Build Status](https://travis-ci.org/manidlou/node-klaw-sync.svg?branch=master)](https://travis-ci.org/manidlou/node-klaw-sync)
Expand All @@ -19,29 +19,35 @@ Usage

### klawSync(directory[, options])

- `directory` `{String}`
- `options` `{Object}` *optional* (all options are `false` by default)
- `ignore` `{String | Array<String>}` any paths or [micromatch](https://github.com/jonschlinkert/micromatch#features) patterns to ignore (can be string or an array of strings)
- `nodir` `{Boolean}` return only files (ignore directories)
- `nofile` `{Boolean}` return only directories (ignore files)
- `directory` `<String>`
- `options` `<Object>` (optional) _all options are `false` by default_
- `nodir` `<Boolean>`
- return only files (ignore directories)
- `nofile` `<Boolean>`
- return only directories (ignore files)
- `noRecurseOnFailedFilter` `<Boolean>`
- when `filter` function is used, the default behavior is to read all directories even if they don't pass the `filter` function (won't be included but still will be traversed). If you set `true`, there will be neither inclusion nor traversal for directories that don't pass the `filter` function
- `filter` `<Function>`
- function that gets one argument `fn({path: '', stats: {}})` and returns true to include or false to exclude the item

- return: `{Array<Object>}` `[{path: '', stats: {}}]`
- **Return:** `<Array<Object>>` `[{path: '', stats: {}}]`

Examples
--------

```js
var klawSync = require('klaw-sync')
var paths = klawSync('/some/dir')
const klawSync = require('klaw-sync')

const paths = klawSync('/some/dir')
// paths = [{path: '/some/dir/dir1', stats: {}}, {path: '/some/dir/file1', stats: {}}]
```

_**catch error**_

```js
var klawSync = require('klaw-sync')
const klawSync = require('klaw-sync')

var paths
let paths
try {
paths = klawSync('/some/dir')
} catch (er) {
Expand All @@ -53,38 +59,67 @@ console.dir(paths)
_**files only**_

```js
var klawSync = require('klaw-sync')
var files = klawSync('/some/dir', {nodir: true})
const klawSync = require('klaw-sync')

const files = klawSync('/some/dir', {nodir: true})
// files = [{path: '/some/dir/file1', stats: {}}, {path: '/some/dir/file2', stats: {}}]
```

_**directories only**_

```js
var klawSync = require('klaw-sync')
var dirs = klawSync('/some/dir', {nofile: true})
const klawSync = require('klaw-sync')

const dirs = klawSync('/some/dir', {nofile: true})
// dirs = [{path: '/some/dir/dir1', stats: {}}, {path: '/some/dir/dir2', stats: {}}]
```

_**ignore `node_modules`**_

Notice here `noRecurseOnFailedFilter: true` option is used since we don't want anything from `node_modules` (no inclusion and no traversal).

```js
var klawSync = require('klaw-sync')
var paths = klawSync('/some/dir', {ignore: 'node_modules'})
const klawSync = require('klaw-sync')

const filterFn = item => item.path.indexOf('node_modules') < 0

const paths = klawSync('/some/dir', { filter: filterFn, noRecurseOnFailedFilter: true })
```

_**ignore `node_modules` and `.git` using [micromatch](https://github.com/jonschlinkert/micromatch#features) patterns**_
_**ignore `node_modules` and `.git`**_

```js
var klawSync = require('klaw-sync')
var paths = klawSync('/some/dir', {ignore: '{node_modules,.git}'})
const klawSync = require('klaw-sync')

const filterFn = item => item.path.indexOf('node_modules') < 0 && item.path.indexOf('.git') < 0

const paths = klawSync('/some/dir', { filter: filterFn, noRecurseOnFailedFilter: true })
```

_**ignore `node_modules`, `.git` and all `*.js` files using [micromatch](https://github.com/jonschlinkert/micromatch#features) patterns**_
_**get all `js` files**_

Here `noRecurseOnFailedFilter` option is not required since we are interested in all `js` files. In other words, although no directories pass the `filter` function, we still want to read them and see if they have any `js` files.

```js
var klawSync = require('klaw-sync')
var paths = klawSync('/some/dir', {ignore: ['{node_modules,.git}', '*.js']})
const path = require('path')
const klawSync = require('klaw-sync')

const filterFn = item => path.extname(item.path) === '.js'

const paths = klawSync('/some/dir', { filter: filterFn })
```

_**filter based on stats**_

Again here `noRecurseOnFailedFilter` option is not required since we still want to read all directories even though they don't pass the `filter` function, to see if their contents pass the `filter` function.

```js
const klawSync = require('klaw-sync')

const refTime = new Date(2017, 3, 24).getTime()
const filterFn = item => item.stats.mtime.getTime() > refTime

const paths = klawSync('/some/dir', { filter: filterFn })
```

Run tests
Expand All @@ -100,24 +135,19 @@ lint & unit: `npm test`
Performance compare to other similar modules
-----------------------------------------------

The `bm.js` runs some basic [benchmark](https://github.com/bestiejs/benchmark.js) tests for two cases, `without --ignore` (basic usage) and `with --ignore`, on these modules:
The `bm.js` runs some basic [benchmark](https://github.com/bestiejs/benchmark.js) tests for two cases: basic usage and with `--nodir=true` (get files only), on these modules:

- `klaw-sync`
- [walk-sync](https://github.com/joliss/node-walk-sync)
- [glob.sync](https://github.com/isaacs/node-glob#globsyncpattern-options)

Just for fun, it turned out (as of January 25, 2017) for the most cases `klaw-sync` is faster than other modules!

#####run benchmark
It turned out (as of January 25, 2017) for the most cases `klaw-sync` is faster than other modules!

To run benchmark, just specify the root `--dir=`. To ignore paths or patterns, use `-i` flag.
##### run benchmark

`npm run benchmark -- --dir=/some/dir`

`npm run benchmark -- --dir=/some/dir -i "node_modules"`

`npm run benchmark -- --dir=/some/dir -i "node_modules" -i "*.js"`

`npm run benchmark -- --dir=/some/dir --nodir=true`

Credit
------
Expand Down
68 changes: 29 additions & 39 deletions benchmark/bm.js
Expand Up @@ -8,77 +8,67 @@ const klawSync = require('../klaw-sync.js')

function help () {
console.log(`Usage examples:\n`)
console.log(`npm run benchmark -- --dir=<rootdir> (basic usage without anything to ignore)`)
console.log(`npm run benchmark -- --dir=<rootdir> -i "{node_modules,.git}" (ignore node_modules and .git directories)`)
console.log(`npm run benchmark -- --dir=<rootdir> -i "node_modules" -i "*.js" (ignore node_modules and all js files)`)
console.log(`npm run benchmark -- --dir=<rootdir>`)
console.log(`npm run benchmark -- --dir=<rootdir> --nodir=true (ignore all directories)`)
}

function perf (root, ign) {
var suite = Benchmark.Suite()
if (ign) {
function runBm (root, opts) {
if (!opts) {
const suite = Benchmark.Suite()
suite.add('walk-sync', function () {
walkSync(root, {ignore: ign})
walkSync(root)
}).add('glob.sync', function () {
globSync('**', {
cwd: root,
dot: true,
mark: true,
strict: true,
ignore: ign
strict: true
})
}).add('klaw-sync', function () {
klawSync(root, {ignore: ign})
klawSync(root)
}).on('error', function (er) {
return er
}).on('cycle', function (ev) {
console.log(String(ev.target))
}).on('complete', function () {
console.log('\nSummary: Fastest is ' + this.filter('fastest').map('name'))
}).run({ 'async': false })
console.log('Fastest is ' + this.filter('fastest').map('name'))
}).run()
} else {
const suite = Benchmark.Suite()
suite.add('walk-sync', function () {
walkSync(root)
walkSync(root, {directories: false})
}).add('glob.sync', function () {
globSync('**', {
cwd: root,
dot: true,
mark: true,
strict: true
strict: true,
nodir: true
})
}).add('klaw-sync', function () {
klawSync(root)
klawSync(root, {nodir: true})
}).on('error', function (er) {
return er
}).on('cycle', function (ev) {
console.log(String(ev.target))
}).on('complete', function () {
console.log('\nSummary: Fastest is ' + this.filter('fastest').map('name'))
}).run({ 'async': false })
console.log('Fastest is ' + this.filter('fastest').map('name'))
}).run()
}
}

try {
if (!argv.dir) {
console.log('err: root dir must be specified.')
help()
process.exit(1)
}
var dir = path.resolve(argv.dir)
console.log('Running benchmark tests...\n')
console.log('root dir: ', argv.dir)
if (argv.i) {
process.stdout.write('ignore: ')
console.dir(argv.i)
console.log()
// convert ignore args to array
if (typeof argv.i === 'string') {
perf(dir, [argv.i])
} else {
perf(dir, argv.i)
}
if (!argv.dir) {
console.log('err: root dir cannot be null.')
help()
} else {
const dir = path.resolve(argv.dir)
console.log('Running benchmark tests..')
if (argv.nodir) {
console.log(`root dir: ${dir}`)
console.log('option.nodir: true\n')
runBm(dir, {nodir: true})
} else {
perf(dir)
console.log(`root dir: ${dir}\n`)
runBm(dir)
}
} catch (er) {
throw er
}
80 changes: 35 additions & 45 deletions klaw-sync.js
@@ -1,60 +1,50 @@
'use strict'
var path = require('path')
var mm = require('micromatch')
var fs
const path = require('path')
let fs
try {
fs = require('graceful-fs')
} catch (e) {
fs = require('fs')
}

function _procPath (dir, pathItem, opts, list) {
var nestedPath
var stat
// here since dir already resolved, we use string concatenation
// which showed faster performance than path.join() and path.resolve()
if (path.sep === '/') {
nestedPath = dir + '/' + pathItem
} else {
nestedPath = dir + '\\' + pathItem
}
stat = fs.lstatSync(nestedPath)
if (stat.isDirectory()) {
if (!opts.nodir) {
list.push({path: nestedPath, stats: stat})
}
list = walkSync(nestedPath, opts, list)
} else {
if (!opts.nofile) {
list.push({path: nestedPath, stats: stat})
function klawSync (dir, opts, ls) {
function procPath (pathItem) {
const stat = fs.lstatSync(pathItem)
const item = {path: pathItem, stats: stat}
if (stat.isDirectory()) {
if (opts.filter) {
if (opts.filter(item)) {
ls.push(item)
ls = klawSync(pathItem, opts, ls)
} else {
if (!opts.noRecurseOnFailedFilter) ls = klawSync(pathItem, opts, ls)
}
} else {
if (!opts.nodir) ls.push(item)
ls = klawSync(pathItem, opts, ls)
}
} else {
if (opts.filter) {
if (opts.filter(item)) ls.push(item)
} else {
if (!opts.nofile) ls.push(item)
}
}
}
}

function walkSync (dir, opts, list) {
var files
var ignore = []
opts = opts || {}
list = list || []
ls = ls || []
dir = path.resolve(dir)
try {
files = fs.readdirSync(dir)
if (opts.ignore) {
ignore = mm(files, opts.ignore)
}
} catch (er) {
throw er
}

for (var i = 0; i < files.length; i += 1) {
var file = files[i]
if (ignore.length > 0) {
if (ignore.indexOf(file) === -1) _procPath(dir, file, opts, list)
} else {
_procPath(dir, file, opts, list)
}
const files = fs.readdirSync(dir)
for (let i = 0; i < files.length; i += 1) {
// here dir already resolved, we use string concatenation since
// showed better performance than path.join() and path.resolve()
let pathItem
if (path.sep === '/') pathItem = dir + '/' + files[i]
else pathItem = dir + '\\' + files[i]
procPath(pathItem)
}
return list
return ls
}

module.exports = walkSync
module.exports = klawSync
5 changes: 1 addition & 4 deletions package.json
Expand Up @@ -21,11 +21,8 @@
"url": "https://github.com/manidlou/node-klaw-sync/issues"
},
"homepage": "https://github.com/manidlou/node-klaw-sync#readme",
"dependencies": {
"micromatch": "^2.3.11"
},
"devDependencies": {
"benchmark": "^2.1.3",
"benchmark": "^2.1.4",
"fs-extra": "^1.0.0",
"glob": "^7.1.1",
"minimist": "^1.2.0",
Expand Down

0 comments on commit 3fc387f

Please sign in to comment.