Skip to content

Commit

Permalink
Add linkifier rule to inline chain for full links
Browse files Browse the repository at this point in the history
prevents emphasis from appearing in `http://example.org/foo._bar_.baz`
  • Loading branch information
rlidwka committed Apr 18, 2022
1 parent 75037c6 commit 6b58ec4
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Smartquotes, typographic replacements and plain text links can now be escaped
with backslash (e.g. `\(c)` or `google\.com` are no longer replaced).
- Fixed collision of emphasis and linkifier (so `http://example.org/foo._bar_-_baz`
is now a single link, not emphasized). Emails and fuzzy links are not affected by this.


## [12.3.2] - 2022-01-08
Expand Down
1 change: 1 addition & 0 deletions lib/parser_inline.js
Expand Up @@ -14,6 +14,7 @@ var Ruler = require('./ruler');

var _rules = [
[ 'text', require('./rules_inline/text') ],
[ 'linkify', require('./rules_inline/linkify') ],
[ 'newline', require('./rules_inline/newline') ],
[ 'escape', require('./rules_inline/escape') ],
[ 'backticks', require('./rules_inline/backticks') ],
Expand Down
11 changes: 10 additions & 1 deletion lib/rules_core/linkify.js
Expand Up @@ -69,8 +69,17 @@ module.exports = function linkify(state) {
level = currentToken.level;
lastPos = 0;

for (ln = 0; ln < links.length; ln++) {
// forbid escape sequence at the start of the string,
// this avoids http\://example.com/ from being linkified as
// http:<a href="//example.com/">//example.com/</a>
if (links.length > 0 &&
links[0].index === 0 &&
i > 0 &&
tokens[i - 1].type === 'text_special') {
links = links.slice(1);
}

for (ln = 0; ln < links.length; ln++) {
url = links[ln].url;
fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) { continue; }
Expand Down
11 changes: 11 additions & 0 deletions lib/rules_inline/html_inline.js
Expand Up @@ -6,6 +6,14 @@
var HTML_TAG_RE = require('../common/html_re').HTML_TAG_RE;


function isLinkOpen(str) {
return /^<a[>\s]/i.test(str);
}
function isLinkClose(str) {
return /^<\/a\s*>/i.test(str);
}


function isLetter(ch) {
/*eslint no-bitwise:0*/
var lc = ch | 0x20; // to lower case
Expand Down Expand Up @@ -41,6 +49,9 @@ module.exports = function html_inline(state, silent) {
if (!silent) {
token = state.push('html_inline', '', 0);
token.content = state.src.slice(pos, pos + match[0].length);

if (isLinkOpen(token.content)) state.linkLevel++;
if (isLinkClose(token.content)) state.linkLevel--;
}
state.pos += match[0].length;
return true;
Expand Down
2 changes: 2 additions & 0 deletions lib/rules_inline/link.js
Expand Up @@ -137,7 +137,9 @@ module.exports = function link(state, silent) {
attrs.push([ 'title', title ]);
}

state.linkLevel++;
state.md.inline.tokenize(state);
state.linkLevel--;

token = state.push('link_close', 'a', -1);
}
Expand Down
58 changes: 58 additions & 0 deletions lib/rules_inline/linkify.js
@@ -0,0 +1,58 @@
// Process links like https://example.org/

'use strict';


// RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
var SCHEME_RE = /(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$/i;


module.exports = function linkify(state, silent) {
var pos, max, match, proto, link, url, fullUrl, token;

if (!state.md.options.linkify) return false;
if (state.linkLevel > 0) return false;

pos = state.pos;
max = state.posMax;

if (pos + 3 > max) return false;
if (state.src.charCodeAt(pos) !== 0x3A/* : */) return false;
if (state.src.charCodeAt(pos + 1) !== 0x2F/* / */) return false;
if (state.src.charCodeAt(pos + 2) !== 0x2F/* / */) return false;

match = state.pending.match(SCHEME_RE);
if (!match) return false;

proto = match[1];

link = state.md.linkify.matchAtStart(state.src.slice(pos - proto.length));
if (!link) return false;

url = link.url;

// disallow '*' at the end of the link (conflicts with emphasis)
url = url.replace(/\*+$/, '');

fullUrl = state.md.normalizeLink(url);
if (!state.md.validateLink(fullUrl)) return false;

if (!silent) {
state.pending = state.pending.slice(0, -proto.length);

token = state.push('link_open', 'a', 1);
token.attrs = [ [ 'href', fullUrl ] ];
token.markup = 'linkify';
token.info = 'auto';

token = state.push('text', '', 0);
token.content = state.md.normalizeLinkText(url);

token = state.push('link_close', 'a', -1);
token.markup = 'linkify';
token.info = 'auto';
}

state.pos += url.length - proto.length;
return true;
};
4 changes: 4 additions & 0 deletions lib/rules_inline/state_inline.js
Expand Up @@ -35,6 +35,10 @@ function StateInline(src, md, env, outTokens) {
// backtick length => last seen position
this.backticks = {};
this.backticksScanned = false;

// Counter used to disable inline linkify-it execution
// inside <a> and markdown links
this.linkLevel = 0;
}


Expand Down
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -39,7 +39,7 @@
"dependencies": {
"argparse": "^2.0.1",
"entities": "~3.0.1",
"linkify-it": "^3.0.1",
"linkify-it": "markdown-it/linkify-it",
"mdurl": "^1.0.1",
"uc.micro": "^1.0.5"
},
Expand Down
112 changes: 112 additions & 0 deletions test/fixtures/markdown-it/linkify.txt
Expand Up @@ -30,6 +30,86 @@ don't touch text in html <a> tags
.


entities inside raw links
.
https://example.com/foo&amp;bar
.
<p><a href="https://example.com/foo&amp;amp;bar">https://example.com/foo&amp;amp;bar</a></p>
.


emphasis inside raw links (asterisk, can happen in links with params)
.
https://example.com/foo*bar*baz
.
<p><a href="https://example.com/foo*bar*baz">https://example.com/foo*bar*baz</a></p>
.


emphasis inside raw links (underscore)
.
http://example.org/foo._bar_-_baz
.
<p><a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a></p>
.


backticks inside raw links
.
https://example.com/foo`bar`baz
.
<p><a href="https://example.com/foo%60bar%60baz">https://example.com/foo`bar`baz</a></p>
.


links inside raw links
.
https://example.com/foo[123](456)bar
.
<p><a href="https://example.com/foo%5B123%5D(456)bar">https://example.com/foo[123](456)bar</a></p>
.


escapes not allowed at the start
.
\https://example.com
.
<p>\https://example.com</p>
.


escapes not allowed at comma
.
https\://example.com
.
<p>https://example.com</p>
.


escapes not allowed at slashes
.
https:\//aa.org https://bb.org
.
<p>https://aa.org <a href="https://bb.org">https://bb.org</a></p>
.


fuzzy link shouldn't match cc.org
.
https:/\/cc.org
.
<p>https://cc.org</p>
.


bold links (exclude markup of pairs from link tail)
.
**http://example.com/foobar**
.
<p><strong><a href="http://example.com/foobar">http://example.com/foobar</a></strong></p>
.


match links without protocol
.
www.example.org
Expand All @@ -55,3 +135,35 @@ http://example.com/(c)
.
<p><a href="http://example.com/(c)">http://example.com/(c)</a></p>
.


coverage, prefix not valid
.
http:/example.com/
.
<p>http:/example.com/</p>
.


coverage, negative link level
.
</a>[https://example.com](https://example.com)
.
<p></a><a href="https://example.com"><a href="https://example.com">https://example.com</a></a></p>
.


emphasis with '*', real link:
.
http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B
.
<p><a href="http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B">http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B</a></p>
.


emphasis with '_', real link:
.
https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf
.
<p><a href="https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf">https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf</a></p>
.

0 comments on commit 6b58ec4

Please sign in to comment.