Skip to content

Commit f848e77

Browse files
authoredFeb 7, 2021
fix: Join adjacent inlineText tokens (#1926)
1 parent f2535f1 commit f848e77

File tree

6 files changed

+112
-107
lines changed

6 files changed

+112
-107
lines changed
 

‎docs/USING_PRO.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ console.log(marked('$ latex code $\n\n` other code `'));
157157
### Block level tokenizer methods
158158

159159
- space(*string* src)
160-
- code(*string* src, *array* tokens)
160+
- code(*string* src)
161161
- fences(*string* src)
162162
- heading(*string* src)
163163
- nptable(*string* src)
@@ -169,7 +169,7 @@ console.log(marked('$ latex code $\n\n` other code `'));
169169
- table(*string* src)
170170
- lheading(*string* src)
171171
- paragraph(*string* src)
172-
- text(*string* src, *array* tokens)
172+
- text(*string* src)
173173

174174
### Inline level tokenizer methods
175175

‎lib/marked.esm.js

+35-33
Original file line numberDiff line numberDiff line change
@@ -391,18 +391,9 @@ var Tokenizer_1 = class Tokenizer {
391391
}
392392
}
393393

394-
code(src, tokens) {
394+
code(src) {
395395
const cap = this.rules.block.code.exec(src);
396396
if (cap) {
397-
const lastToken = tokens[tokens.length - 1];
398-
// An indented code block cannot interrupt a paragraph.
399-
if (lastToken && lastToken.type === 'paragraph') {
400-
return {
401-
raw: cap[0],
402-
text: cap[0].trimRight()
403-
};
404-
}
405-
406397
const text = cap[0].replace(/^ {1,4}/gm, '');
407398
return {
408399
type: 'code',
@@ -722,17 +713,9 @@ var Tokenizer_1 = class Tokenizer {
722713
}
723714
}
724715

725-
text(src, tokens) {
716+
text(src) {
726717
const cap = this.rules.block.text.exec(src);
727718
if (cap) {
728-
const lastToken = tokens[tokens.length - 1];
729-
if (lastToken && lastToken.type === 'text') {
730-
return {
731-
raw: cap[0],
732-
text: cap[0]
733-
};
734-
}
735-
736719
return {
737720
type: 'text',
738721
raw: cap[0],
@@ -1505,14 +1488,15 @@ var Lexer_1 = class Lexer {
15051488
}
15061489

15071490
// code
1508-
if (token = this.tokenizer.code(src, tokens)) {
1491+
if (token = this.tokenizer.code(src)) {
15091492
src = src.substring(token.raw.length);
1510-
if (token.type) {
1511-
tokens.push(token);
1512-
} else {
1513-
lastToken = tokens[tokens.length - 1];
1493+
lastToken = tokens[tokens.length - 1];
1494+
// An indented code block cannot interrupt a paragraph.
1495+
if (lastToken && lastToken.type === 'paragraph') {
15141496
lastToken.raw += '\n' + token.raw;
15151497
lastToken.text += '\n' + token.text;
1498+
} else {
1499+
tokens.push(token);
15161500
}
15171501
continue;
15181502
}
@@ -1605,14 +1589,14 @@ var Lexer_1 = class Lexer {
16051589
}
16061590

16071591
// text
1608-
if (token = this.tokenizer.text(src, tokens)) {
1592+
if (token = this.tokenizer.text(src)) {
16091593
src = src.substring(token.raw.length);
1610-
if (token.type) {
1611-
tokens.push(token);
1612-
} else {
1613-
lastToken = tokens[tokens.length - 1];
1594+
lastToken = tokens[tokens.length - 1];
1595+
if (lastToken && lastToken.type === 'text') {
16141596
lastToken.raw += '\n' + token.raw;
16151597
lastToken.text += '\n' + token.text;
1598+
} else {
1599+
tokens.push(token);
16161600
}
16171601
continue;
16181602
}
@@ -1697,7 +1681,7 @@ var Lexer_1 = class Lexer {
16971681
* Lexing/Compiling
16981682
*/
16991683
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
1700-
let token;
1684+
let token, lastToken;
17011685

17021686
// String with links masked to avoid interference with em and strong
17031687
let maskedSrc = src;
@@ -1737,7 +1721,13 @@ var Lexer_1 = class Lexer {
17371721
src = src.substring(token.raw.length);
17381722
inLink = token.inLink;
17391723
inRawBlock = token.inRawBlock;
1740-
tokens.push(token);
1724+
lastToken = tokens[tokens.length - 1];
1725+
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
1726+
lastToken.raw += token.raw;
1727+
lastToken.text += token.text;
1728+
} else {
1729+
tokens.push(token);
1730+
}
17411731
continue;
17421732
}
17431733

@@ -1754,10 +1744,16 @@ var Lexer_1 = class Lexer {
17541744
// reflink, nolink
17551745
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
17561746
src = src.substring(token.raw.length);
1747+
lastToken = tokens[tokens.length - 1];
17571748
if (token.type === 'link') {
17581749
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
1750+
tokens.push(token);
1751+
} else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
1752+
lastToken.raw += token.raw;
1753+
lastToken.text += token.text;
1754+
} else {
1755+
tokens.push(token);
17591756
}
1760-
tokens.push(token);
17611757
continue;
17621758
}
17631759

@@ -1818,7 +1814,13 @@ var Lexer_1 = class Lexer {
18181814
src = src.substring(token.raw.length);
18191815
prevChar = token.raw.slice(-1);
18201816
keepPrevChar = true;
1821-
tokens.push(token);
1817+
lastToken = tokens[tokens.length - 1];
1818+
if (lastToken && lastToken.type === 'text') {
1819+
lastToken.raw += token.raw;
1820+
lastToken.text += token.text;
1821+
} else {
1822+
tokens.push(token);
1823+
}
18221824
continue;
18231825
}
18241826

‎lib/marked.js

+38-34
Original file line numberDiff line numberDiff line change
@@ -488,19 +488,10 @@
488488
}
489489
};
490490

491-
_proto.code = function code(src, tokens) {
491+
_proto.code = function code(src) {
492492
var cap = this.rules.block.code.exec(src);
493493

494494
if (cap) {
495-
var lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph.
496-
497-
if (lastToken && lastToken.type === 'paragraph') {
498-
return {
499-
raw: cap[0],
500-
text: cap[0].trimRight()
501-
};
502-
}
503-
504495
var text = cap[0].replace(/^ {1,4}/gm, '');
505496
return {
506497
type: 'code',
@@ -812,19 +803,10 @@
812803
}
813804
};
814805

815-
_proto.text = function text(src, tokens) {
806+
_proto.text = function text(src) {
816807
var cap = this.rules.block.text.exec(src);
817808

818809
if (cap) {
819-
var lastToken = tokens[tokens.length - 1];
820-
821-
if (lastToken && lastToken.type === 'text') {
822-
return {
823-
raw: cap[0],
824-
text: cap[0]
825-
};
826-
}
827-
828810
return {
829811
type: 'text',
830812
raw: cap[0],
@@ -1497,15 +1479,15 @@
14971479
} // code
14981480

14991481

1500-
if (token = this.tokenizer.code(src, tokens)) {
1482+
if (token = this.tokenizer.code(src)) {
15011483
src = src.substring(token.raw.length);
1484+
lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph.
15021485

1503-
if (token.type) {
1504-
tokens.push(token);
1505-
} else {
1506-
lastToken = tokens[tokens.length - 1];
1486+
if (lastToken && lastToken.type === 'paragraph') {
15071487
lastToken.raw += '\n' + token.raw;
15081488
lastToken.text += '\n' + token.text;
1489+
} else {
1490+
tokens.push(token);
15091491
}
15101492

15111493
continue;
@@ -1603,15 +1585,15 @@
16031585
} // text
16041586

16051587

1606-
if (token = this.tokenizer.text(src, tokens)) {
1588+
if (token = this.tokenizer.text(src)) {
16071589
src = src.substring(token.raw.length);
1590+
lastToken = tokens[tokens.length - 1];
16081591

1609-
if (token.type) {
1610-
tokens.push(token);
1611-
} else {
1612-
lastToken = tokens[tokens.length - 1];
1592+
if (lastToken && lastToken.type === 'text') {
16131593
lastToken.raw += '\n' + token.raw;
16141594
lastToken.text += '\n' + token.text;
1595+
} else {
1596+
tokens.push(token);
16151597
}
16161598

16171599
continue;
@@ -1718,7 +1700,7 @@
17181700
inRawBlock = false;
17191701
}
17201702

1721-
var token; // String with links masked to avoid interference with em and strong
1703+
var token, lastToken; // String with links masked to avoid interference with em and strong
17221704

17231705
var maskedSrc = src;
17241706
var match;
@@ -1759,7 +1741,15 @@
17591741
src = src.substring(token.raw.length);
17601742
inLink = token.inLink;
17611743
inRawBlock = token.inRawBlock;
1762-
tokens.push(token);
1744+
var _lastToken = tokens[tokens.length - 1];
1745+
1746+
if (_lastToken && token.type === 'text' && _lastToken.type === 'text') {
1747+
_lastToken.raw += token.raw;
1748+
_lastToken.text += token.text;
1749+
} else {
1750+
tokens.push(token);
1751+
}
1752+
17631753
continue;
17641754
} // link
17651755

@@ -1778,12 +1768,18 @@
17781768

17791769
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
17801770
src = src.substring(token.raw.length);
1771+
var _lastToken2 = tokens[tokens.length - 1];
17811772

17821773
if (token.type === 'link') {
17831774
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
1775+
tokens.push(token);
1776+
} else if (_lastToken2 && token.type === 'text' && _lastToken2.type === 'text') {
1777+
_lastToken2.raw += token.raw;
1778+
_lastToken2.text += token.text;
1779+
} else {
1780+
tokens.push(token);
17841781
}
17851782

1786-
tokens.push(token);
17871783
continue;
17881784
} // strong
17891785

@@ -1844,7 +1840,15 @@
18441840
src = src.substring(token.raw.length);
18451841
prevChar = token.raw.slice(-1);
18461842
keepPrevChar = true;
1847-
tokens.push(token);
1843+
lastToken = tokens[tokens.length - 1];
1844+
1845+
if (lastToken && lastToken.type === 'text') {
1846+
lastToken.raw += token.raw;
1847+
lastToken.text += token.text;
1848+
} else {
1849+
tokens.push(token);
1850+
}
1851+
18481852
continue;
18491853
}
18501854

‎src/Lexer.js

+33-14
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,15 @@ module.exports = class Lexer {
136136
}
137137

138138
// code
139-
if (token = this.tokenizer.code(src, tokens)) {
139+
if (token = this.tokenizer.code(src)) {
140140
src = src.substring(token.raw.length);
141-
if (token.type) {
142-
tokens.push(token);
143-
} else {
144-
lastToken = tokens[tokens.length - 1];
141+
lastToken = tokens[tokens.length - 1];
142+
// An indented code block cannot interrupt a paragraph.
143+
if (lastToken && lastToken.type === 'paragraph') {
145144
lastToken.raw += '\n' + token.raw;
146145
lastToken.text += '\n' + token.text;
146+
} else {
147+
tokens.push(token);
147148
}
148149
continue;
149150
}
@@ -236,14 +237,14 @@ module.exports = class Lexer {
236237
}
237238

238239
// text
239-
if (token = this.tokenizer.text(src, tokens)) {
240+
if (token = this.tokenizer.text(src)) {
240241
src = src.substring(token.raw.length);
241-
if (token.type) {
242-
tokens.push(token);
243-
} else {
244-
lastToken = tokens[tokens.length - 1];
242+
lastToken = tokens[tokens.length - 1];
243+
if (lastToken && lastToken.type === 'text') {
245244
lastToken.raw += '\n' + token.raw;
246245
lastToken.text += '\n' + token.text;
246+
} else {
247+
tokens.push(token);
247248
}
248249
continue;
249250
}
@@ -331,7 +332,7 @@ module.exports = class Lexer {
331332
* Lexing/Compiling
332333
*/
333334
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
334-
let token;
335+
let token, lastToken;
335336

336337
// String with links masked to avoid interference with em and strong
337338
let maskedSrc = src;
@@ -371,7 +372,13 @@ module.exports = class Lexer {
371372
src = src.substring(token.raw.length);
372373
inLink = token.inLink;
373374
inRawBlock = token.inRawBlock;
374-
tokens.push(token);
375+
const lastToken = tokens[tokens.length - 1];
376+
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
377+
lastToken.raw += token.raw;
378+
lastToken.text += token.text;
379+
} else {
380+
tokens.push(token);
381+
}
375382
continue;
376383
}
377384

@@ -388,10 +395,16 @@ module.exports = class Lexer {
388395
// reflink, nolink
389396
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
390397
src = src.substring(token.raw.length);
398+
const lastToken = tokens[tokens.length - 1];
391399
if (token.type === 'link') {
392400
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
401+
tokens.push(token);
402+
} else if (lastToken && token.type === 'text' && lastToken.type === 'text') {
403+
lastToken.raw += token.raw;
404+
lastToken.text += token.text;
405+
} else {
406+
tokens.push(token);
393407
}
394-
tokens.push(token);
395408
continue;
396409
}
397410

@@ -452,7 +465,13 @@ module.exports = class Lexer {
452465
src = src.substring(token.raw.length);
453466
prevChar = token.raw.slice(-1);
454467
keepPrevChar = true;
455-
tokens.push(token);
468+
lastToken = tokens[tokens.length - 1];
469+
if (lastToken && lastToken.type === 'text') {
470+
lastToken.raw += token.raw;
471+
lastToken.text += token.text;
472+
} else {
473+
tokens.push(token);
474+
}
456475
continue;
457476
}
458477

‎src/Tokenizer.js

+2-19
Original file line numberDiff line numberDiff line change
@@ -79,18 +79,9 @@ module.exports = class Tokenizer {
7979
}
8080
}
8181

82-
code(src, tokens) {
82+
code(src) {
8383
const cap = this.rules.block.code.exec(src);
8484
if (cap) {
85-
const lastToken = tokens[tokens.length - 1];
86-
// An indented code block cannot interrupt a paragraph.
87-
if (lastToken && lastToken.type === 'paragraph') {
88-
return {
89-
raw: cap[0],
90-
text: cap[0].trimRight()
91-
};
92-
}
93-
9485
const text = cap[0].replace(/^ {1,4}/gm, '');
9586
return {
9687
type: 'code',
@@ -410,17 +401,9 @@ module.exports = class Tokenizer {
410401
}
411402
}
412403

413-
text(src, tokens) {
404+
text(src) {
414405
const cap = this.rules.block.text.exec(src);
415406
if (cap) {
416-
const lastToken = tokens[tokens.length - 1];
417-
if (lastToken && lastToken.type === 'text') {
418-
return {
419-
raw: cap[0],
420-
text: cap[0]
421-
};
422-
}
423-
424407
return {
425408
type: 'text',
426409
raw: cap[0],

‎test/unit/Lexer-spec.js

+2-5
Original file line numberDiff line numberDiff line change
@@ -616,9 +616,7 @@ a | b
616616
md: '<div>html</div>',
617617
options: { sanitize: true },
618618
tokens: [
619-
{ type: 'text', raw: '<div>', inLink: false, inRawBlock: false, text: '&lt;div&gt;' },
620-
{ type: 'text', raw: 'html', text: 'html' },
621-
{ type: 'text', raw: '</div>', inLink: false, inRawBlock: false, text: '&lt;/div&gt;' }
619+
{ type: 'text', raw: '<div>html</div>', inLink: false, inRawBlock: false, text: '&lt;div&gt;html&lt;/div&gt;' }
622620
]
623621
});
624622
});
@@ -740,8 +738,7 @@ a | b
740738
expectInlineTokens({
741739
md: '[link]',
742740
tokens: [
743-
{ type: 'text', raw: '[', text: '[' },
744-
{ type: 'text', raw: 'link]', text: 'link]' }
741+
{ type: 'text', raw: '[link]', text: '[link]' }
745742
]
746743
});
747744
});

1 commit comments

Comments
 (1)
Please sign in to comment.