Skip to content

Commit

Permalink
Ability to pass custom formatting via the format option (#128)
Browse files Browse the repository at this point in the history
* Ability to pass custom formatting via the `format` option

* Update README with custom format doc & example
  • Loading branch information
meriadec authored and Malte Legenhausen committed May 23, 2017
1 parent 8782c3d commit 2ac2830
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
22 changes: 22 additions & 0 deletions README.md
Expand Up @@ -71,6 +71,28 @@ You can configure the behaviour of html-to-text with the following options:
* `longWordSplit` describes how to wrap long words, has the following parameters:
* `wrapCharacters` is an array containing the characters that may be wrapped on, these are used in order
* `forceWrapOnLimit` defines whether to break long words on the limit if `true`.
* `format` pass an object to enable custom formatting for specific elements (see below)

### Override formatting for specific elements

By using the `format` option, you can specify formatting for these elements: `text`, `image`, `lineBreak`, `paragraph`, `anchor`, `heading`, `table`, `orderedList`, `unorderedList`, `listItem`, `horizontalLine`.

Each key must be a function which eventually receive `node` (the current node), `fn` (the next formatting function) and `options` (the options passed to html-to-text).

```js
var htmlToText = require('html-to-text');

var text = htmlToText.fromString('<h1>Hello World</h1>', {
format: {
heading: function (node, fn, options) {
var h = fn(elem.children, options);
return '====\n' + h.toUpperCase() + '\n====';
}
}
});

console.log(text);
```

## Command Line Interface

Expand Down
5 changes: 4 additions & 1 deletion lib/html-to-text.js
Expand Up @@ -6,7 +6,7 @@ var _s = require('underscore.string');
var htmlparser = require('htmlparser2');

var helper = require('./helper');
var format = require('./formatter');
var defaultFormat = require('./formatter');

// Which type of tags should not be parsed
var SKIP_TYPES = [
Expand All @@ -27,6 +27,7 @@ function htmlToText(html, options) {
noLinkBrackets: false,
baseElement: 'body',
returnDomByDefault: true,
format: {},
decodeOptions: {
isAttributeValue: false,
strict: false
Expand Down Expand Up @@ -107,6 +108,8 @@ function walk(dom, options, result) {
result = '';
}
var whiteSpaceRegex = /\s$/;
var format = _.assign({}, defaultFormat, options.format);

_.each(dom, function(elem) {
switch(elem.type) {
case 'tag':
Expand Down
14 changes: 14 additions & 0 deletions test/html-to-text.js
Expand Up @@ -316,6 +316,20 @@ describe('html-to-text', function() {
});
});

describe('custom formatting', function () {
it('should allow to pass custom formatting functions', function () {
var result = htmlToText.fromString('<h1>TeSt</h1>', {
format: {
heading: function (elem, fn, options) {
var h = fn(elem.children, options);
return '====\n' + h.toLowerCase() + '\n====';
}
}
});
expect(result).to.equal('====\ntest\n====');
})
});

describe('Base element', function () {
it('should retrieve and convert the entire document under `body` by default', function(done) {
var htmlFile = path.join(__dirname, 'test.html');
Expand Down

0 comments on commit 2ac2830

Please sign in to comment.