Skip to content

Commit

Permalink
fix: parser tags and attributes according spec (#243)
Browse files Browse the repository at this point in the history
  • Loading branch information
evilebottnawi committed Mar 3, 2020
1 parent 3df909d commit 24b0427
Show file tree
Hide file tree
Showing 8 changed files with 444 additions and 50 deletions.
75 changes: 52 additions & 23 deletions src/plugins/attribute-plugin.js
Expand Up @@ -426,6 +426,52 @@ function processMatch(match, strUntilValue, name, value, index) {
});
}

// https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name
const validTagName = '[A-Za-z0-9]+';
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
const validCustomElementName =
'[a-z](?:[-.0-9_a-z\xB7\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|[\uD800-\uDB7F][\uDC00-\uDFFF])*-(?:[-.0-9_a-z\xB7\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|[\uD800-\uDB7F][\uDC00-\uDFFF])*';

const matchTagRegExp = `<((?:${validCustomElementName})|(?:${validTagName}))\\s+`;

// https://html.spec.whatwg.org/multipage/syntax.html#attributes-2
const controls = '\u007F-\u009F';
const invalid = ' "\'>/=';
const noncharacter = '\uFDD0-\uFDEF\uFFFE\uFFFF\uD800-\uDFFF';
const validAttribute = `[^${controls}${invalid}${noncharacter}]+`;

const validAttributeRegExp1 = `((${validAttribute})\\s*=\\s*")([^"]*)"`;
const validAttributeRegExp2 = `((${validAttribute})\\s*=\\s*')([^']*)'`;
const validAttributeRegExp3 = `((${validAttribute})\\s*=\\s*)([^\\s>]+)`;

function getParser() {
const outside = {
'<!--.*?-->': true,
'<![CDATA[.*?]]>': true,
'<[!\\?].*?>': true,
'</[^>]+>': true,
};

outside[matchTagRegExp] = function matchTag(match, tagName) {
this.currentTag = tagName;

return 'inside';
};

const inside = {
// eat up whitespace
'\\s+': true,
// end of attributes
'>': 'outside',
};

inside[validAttributeRegExp1] = processMatch;
inside[validAttributeRegExp2] = processMatch;
inside[validAttributeRegExp3] = processMatch;

return new Parser({ outside, inside });
}

export default (options) =>
function process(html, result) {
const tagsAndAttributes =
Expand All @@ -444,36 +490,19 @@ export default (options) =>
]
: options.attributes;

const parser = new Parser({
outside: {
'<!--.*?-->': true,
'<![CDATA[.*?]]>': true,
'<[!\\?].*?>': true,
'</[^>]+>': true,
'<([a-zA-Z\\-:]+)\\s*': function matchTag(match, tagName) {
this.currentTag = tagName;

return 'inside';
},
},
inside: {
// eat up whitespace
'\\s+': true,
// end of attributes
'>': 'outside',
'(([0-9a-zA-Z\\-:]+)\\s*=\\s*")([^"]*)"': processMatch,
"(([0-9a-zA-Z\\-:]+)\\s*=\\s*')([^']*)'": processMatch,
'(([0-9a-zA-Z\\-:]+)\\s*=\\s*)([^\\s>]+)': processMatch,
},
});

const parser = getParser();
const sources = parser.parse('outside', html, {
currentTag: null,
results: [],
filter: (value) => {
return isUrlRequest(value, options.root);
},
isRelevantTagAttribute: (tag, attribute) => {
// eslint-disable-next-line no-param-reassign
tag = tag.trim();
// eslint-disable-next-line no-param-reassign
attribute = attribute.trim();

return tagsAndAttributes.some((item) => {
const pattern = new RegExp(`^${item}$`, 'i');

Expand Down
196 changes: 186 additions & 10 deletions test/__snapshots__/attributes-option.test.js.snap

Large diffs are not rendered by default.

54 changes: 51 additions & 3 deletions test/__snapshots__/esModule-option.test.js.snap

Large diffs are not rendered by default.

18 changes: 17 additions & 1 deletion test/__snapshots__/loader.test.js.snap

Large diffs are not rendered by default.

66 changes: 57 additions & 9 deletions test/__snapshots__/minimize-option.test.js.snap

Large diffs are not rendered by default.

36 changes: 34 additions & 2 deletions test/__snapshots__/root-option.test.js.snap

Large diffs are not rendered by default.

33 changes: 31 additions & 2 deletions test/attributes-option.test.js
Expand Up @@ -164,7 +164,22 @@ describe("'attributes' option", () => {
});

it('should work with an "array" notations', async () => {
const compiler = getCompiler('simple.js', { attributes: ['img:src'] });
const compiler = getCompiler('simple.js', {
attributes: [
'img:src',
'flag-icon:src',
'MyStrangeTag13:src',
'a-:src',
'a-.:src',
'a--:src',
'aÀ-豈:src',
'aÀ-Ⰰ:src',
// Should not work
'INVALID_TAG_NAME:src',
// Should not work
'invalid-CUSTOM-TAG:src',
],
});
const stats = await compile(compiler);

expect(getModuleSource('./simple.html', stats)).toMatchSnapshot('module');
Expand All @@ -177,7 +192,21 @@ describe("'attributes' option", () => {

it('should work with multiple an "array" notations', async () => {
const compiler = getCompiler('simple.js', {
attributes: ['img:src', 'script:src'],
attributes: [
'img:src',
'script:src',
'flag-icon:src',
'MyStrangeTag13:src',
'a-:src',
'a-.:src',
'a--:src',
'aÀ-豈:src',
'aÀ-Ⰰ:src',
// Should not work
'INVALID_TAG_NAME:src',
// Should not work
'invalid-CUSTOM-TAG:src',
],
});
const stats = await compile(compiler);

Expand Down
16 changes: 16 additions & 0 deletions test/fixtures/simple.html
Expand Up @@ -197,3 +197,19 @@ <h2>An Ordered HTML List</h2>

<img srcset="#test" src="#test" alt="Elva dressed as a fairy">
<img src="image%20image.png" alt="Test">

<flag-icon src="image.png"></flag-icon>
<MyStrangeTag13 src="image.png" alt="Smiley face" />
<MyStrangeTag13 src="image.png" alt="Smiley face"> Test </MyStrangeTag13>
<a- src="image.png"></a->
<a-. src="image.png">test</a-.>
<a-- src="image.png">test</a-->
<aÀ-豈 src="image.png">test</aÀ-豈>
<aÀ-Ⰰ src="image.png" />

<INVALID_TAG_NAME src="image.png" />
<invalid-CUSTOM-TAG src="image.png" />

<p>Text</p>
<p >Text</p>
<p >Text</p>

0 comments on commit 24b0427

Please sign in to comment.