Skip to content

Commit

Permalink
Scheme: Fixed number pattern (#2648)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment committed Nov 28, 2020
1 parent 05afbb1 commit e01ecd0
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 85 deletions.
191 changes: 117 additions & 74 deletions components/prism-scheme.js
@@ -1,78 +1,121 @@
Prism.languages.scheme = {
// this supports "normal" single-line comments:
// ; comment
// and (potentially nested) multiline comments:
// #| comment #| nested |# still comment |#
// (only 1 level of nesting is supported)
'comment': /;.*|#;\s*\((?:[^()]|\([^()]*\))*\)|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
'string': {
pattern: /"(?:[^"\\]|\\.)*"/,
greedy: true
},
'symbol': {
pattern: /'[^()#'\s]+/,
greedy: true
},
'character': {
pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|\S)/,
greedy: true,
alias: 'string'
},
'lambda-parameter': [
// https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
{
pattern: /((?:^|[^'`#])\(lambda\s+)(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)/,
(function (Prism) {
Prism.languages.scheme = {
// this supports "normal" single-line comments:
// ; comment
// and (potentially nested) multiline comments:
// #| comment #| nested |# still comment |#
// (only 1 level of nesting is supported)
'comment': /;.*|#;\s*\((?:[^()]|\([^()]*\))*\)|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
'string': {
pattern: /"(?:[^"\\]|\\.)*"/,
greedy: true
},
'symbol': {
pattern: /'[^()#'\s]+/,
greedy: true
},
'character': {
pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|\S)/,
greedy: true,
alias: 'string'
},
'lambda-parameter': [
// https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
{
pattern: /((?:^|[^'`#])\(lambda\s+)(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)/,
lookbehind: true
},
{
pattern: /((?:^|[^'`#])\(lambda\s+\()[^()']+/,
lookbehind: true
}
],
'keyword': {
pattern: /((?:^|[^'`#])\()(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|export|except|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\s]|$)/,
lookbehind: true
},
'builtin': {
// all functions of the base library of R7RS plus some of built-ins of R5Rs
pattern: /((?:^|[^'`#])\()(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\s]|$)/,
lookbehind: true
},
'operator': {
pattern: /((?:^|[^'`#])\()(?:[-+*%/]|[<>]=?|=>?)(?=[()\s]|$)/,
lookbehind: true
},
{
pattern: /((?:^|[^'`#])\(lambda\s+\()[^()']+/,
'number': {
// The number pattern from [the R7RS spec](https://small.r7rs.org/attachment/r7rs.pdf).
//
// <number> := <num 2>|<num 8>|<num 10>|<num 16>
// <num R> := <prefix R><complex R>
// <complex R> := <real R>(?:@<real R>|<imaginary R>)?|<imaginary R>
// <imaginary R> := [+-](?:<ureal R>|(?:inf|nan)\.0)?i
// <real R> := [+-]?<ureal R>|[+-](?:inf|nan)\.0
// <ureal R> := <uint R>(?:\/<uint R>)?
// | <decimal R>
//
// <decimal 10> := (?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?
// <uint R> := <digit R>+
// <prefix R> := <radix R>(?:#[ei])?|(?:#[ei])?<radix R>
// <radix 2> := #b
// <radix 8> := #o
// <radix 10> := (?:#d)?
// <radix 16> := #x
// <digit 2> := [01]
// <digit 8> := [0-7]
// <digit 10> := \d
// <digit 16> := [0-9a-f]
//
// The problem with this grammar is that the resulting regex is way to complex, so we simplify by grouping all
// non-decimal bases together. This results in a decimal (dec) and combined binary, octal, and hexadecimal (box)
// pattern:
pattern: RegExp(SortedBNF({
'<ureal dec>': /\d+(?:\/\d+)?|(?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?/.source,
'<real dec>': /[+-]?<ureal dec>|[+-](?:inf|nan)\.0/.source,
'<imaginary dec>': /[+-](?:<ureal dec>|(?:inf|nan)\.0)?i/.source,
'<complex dec>': /<real dec>(?:@<real dec>|<imaginary dec>)?|<imaginary dec>/.source,
'<num dec>': /(?:#d(?:#[ei])?|#[ei](?:#d)?)?<complex dec>/.source,

'<ureal box>': /[0-9a-f]+(?:\/[0-9a-f]+)?/.source,
'<real box>': /[+-]?<ureal box>|[+-](?:inf|nan)\.0/.source,
'<imaginary box>': /[+-](?:<ureal box>|(?:inf|nan)\.0)?i/.source,
'<complex box>': /<real box>(?:@<real box>|<imaginary box>)?|<imaginary box>/.source,
'<num box>': /#[box](?:#[ei])?|(?:#[ei])?#[box]<complex box>/.source,

'<number>': /(^|[\s()])(?:<num dec>|<num box>)(?=[()\s]|$)/.source,
}), 'i'),
lookbehind: true
},
'boolean': {
pattern: /(^|[\s()])#(?:[ft]|false|true)(?=[()\s]|$)/,
lookbehind: true
},
'function': {
pattern: /((?:^|[^'`#])\()(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\s]|$)/,
lookbehind: true
},
'identifier': {
pattern: /(^|[\s()])\|(?:[^\\|]|\\.)*\|(?=[()\s]|$)/,
lookbehind: true,
greedy: true
},
'punctuation': /[()']/
};

/**
* Given a topologically sorted BNF grammar, this will return the RegExp source of last rule of the grammar.
*
* @param {Record<string, string>} grammar
* @returns {string}
*/
function SortedBNF(grammar) {
for (var key in grammar) {
grammar[key] = grammar[key].replace(/<[\w\s]+>/g, function (key) {
return '(?:' + grammar[key].trim() + ')';
});
}
],
'keyword': {
pattern: /((?:^|[^'`#])\()(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|export|except|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\s]|$)/,
lookbehind: true
},
'builtin': {
// all functions of the base library of R7RS plus some of built-ins of R5Rs
pattern: /((?:^|[^'`#])\()(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\s]|$)/,
lookbehind: true
},
'operator': {
pattern: /((?:^|[^'`#])\()(?:[-+*%/]|[<>]=?|=>?)(?=[()\s]|$)/,
lookbehind: true
},
'number': {
// This pattern (apart from the lookarounds) works like this:
//
// Decimal numbers
// <dec real> := \d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+
// <dec complex> := <dec real>(?:[+-]<dec real>i)?|<dec real>i
// <dec prefix> := (?:#d(?:#[ei])?|#[ei](?:#d)?)?
// <dec number> := <dec prefix>[+-]?<complex>
//
// Binary, octal, and hexadecimal numbers
// <b.o.x. real> := [\da-fA-F]+(?:\/[\da-fA-F]+)?
// <b.o.x. complex> := <b.o.x. real>(?:[+-]<b.o.x. real>i)?|<b.o.x. real>i
// <b.o.x. prefix> := #[box](?:#[ei])?|#[ei](?:#[box])?
// <b.o.x. number> := <b.o.x. prefix>[+-]?<b.o.x. complex>
//
// <number> := <dec number>|<b.o.x. number>
pattern: /(^|[\s()])(?:(?:#d(?:#[ei])?|#[ei](?:#d)?)?[+-]?(?:(?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)(?:[+-](?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)i)?|(?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)i)|(?:#[box](?:#[ei])?|#[ei](?:#[box])?)[+-]?(?:[\da-fA-F]+(?:\/[\da-fA-F]+)?(?:[+-][\da-fA-F]+(?:\/[\da-fA-F]+)?i)?|[\da-fA-F]+(?:\/[\da-fA-F]+)?i))(?=[()\s]|$)/,
lookbehind: true
},
'boolean': {
pattern: /(^|[\s()])#(?:[ft]|false|true)(?=[()\s]|$)/,
lookbehind: true
},
'function': {
pattern: /((?:^|[^'`#])\()(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\s]|$)/,
lookbehind: true
},
'identifier': {
pattern: /(^|[\s()])\|(?:[^\\|]|\\.)*\|(?=[()\s]|$)/,
lookbehind: true,
greedy: true
},
'punctuation': /[()']/
};
// return the last item
return grammar[key];
}

})(Prism);
2 changes: 1 addition & 1 deletion components/prism-scheme.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 18 additions & 5 deletions tests/languages/racket/number_feature.test
@@ -1,15 +1,21 @@
123

(foo 42 +42 -42)
(foo 1e3 +1e3 -1e3)
(foo 1e+3 1e-3 3.14159 3.14159e-1)
(foo 8/3)
(foo 3+4i 2.5+0.0i 2.5+0.0i -2.5e4+0.0e4i 3+0i -2e-5i)
(list 10i +10i -10i 10.10i 10+10i 10.10+10.10i 10-10i 10e+10i 10+10e+10i)
(list +10i -10i 10+10i 10.10+10.10i 10-10i 10+10e+10i)

(list #d123 #e#d123e-4 #d#i12 #i-1.234i)

(list #xBAD #b1110011 #o777)
(list #i#x10 #i#x10+10i #b10+10i)

10+i
10+.1i
10+1.i

; not a number but a symbol
(define 1+2 10)

Expand All @@ -19,6 +25,8 @@
----------------------------------------------------

[
["number", "123"],

["punctuation", "("],
["function", "foo"],
["number", "42"],
Expand Down Expand Up @@ -58,14 +66,11 @@

["punctuation", "("],
["builtin", "list"],
["number", "10i"],
["number", "+10i"],
["number", "-10i"],
["number", "10.10i"],
["number", "10+10i"],
["number", "10.10+10.10i"],
["number", "10-10i"],
["number", "10e+10i"],
["number", "10+10e+10i"],
["punctuation", ")"],

Expand All @@ -91,8 +96,16 @@
["number", "#b10+10i"],
["punctuation", ")"],

["number", "10+i"],
["number", "10+.1i"],
["number", "10+1.i"],

["comment", "; not a number but a symbol"],
["punctuation", "("], ["keyword", "define"], " 1+2 ", ["number", "10"], ["punctuation", ")"],
["punctuation", "("],
["keyword", "define"],
" 1+2 ",
["number", "10"],
["punctuation", ")"],

["punctuation", "["],
["function", "foo"],
Expand Down

0 comments on commit e01ecd0

Please sign in to comment.