Skip to content

Commit

Permalink
dev: Improve grammar matching. (#1902)
Browse files Browse the repository at this point in the history
* dev: Improve grammar matching.

* fix testing issue.
  • Loading branch information
Jason3S committed Oct 19, 2021
1 parent b83ee6e commit 2b7e136
Show file tree
Hide file tree
Showing 20 changed files with 1,658 additions and 26 deletions.
1 change: 1 addition & 0 deletions cspell-dict.txt
Expand Up @@ -14,6 +14,7 @@ dependabot
deserialize
deserializer
deserializers
DocBlock
exonum
gimu
globstar
Expand Down
164 changes: 164 additions & 0 deletions packages/cspell-grammar/fixtures/tokenizer-tests/test.md
Expand Up @@ -20,4 +20,168 @@
| `")"` | |
| `")"` | |

- `0`: <code> const greeting = &quot;hello&quot;;↩</code>

| text | scope |
| ------------------------------- | --------------------------------- |
| <code> const greeting = </code> | source.ts |
| <code>&quot;</code> | string.quoted.double.ts source.ts |
| <code>hello</code> | string.quoted.double.ts source.ts |
| <code>&quot;</code> | string.quoted.double.ts source.ts |
| <code>;↩</code> | source.ts |

# Sample TypeScript file

- `0`: <code>import { tokenizedLinesToMarkdown } from &#39;./visualizeAsMD&#39;;↩</code>

| text | scope |
| ------------------------------------------------------ | --------------------------------- |
| <code>import { tokenizedLinesToMarkdown } from </code> | source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>./visualizeAsMD</code> | string.quoted.single.ts source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>;↩</code> | source.ts |

- `1`: <code>import { TypeScript } from &#39;../grammars&#39;;↩</code>

| text | scope |
| ---------------------------------------- | --------------------------------- |
| <code>import { TypeScript } from </code> | source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>../grammars</code> | string.quoted.single.ts source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>;↩</code> | source.ts |

- `2`: <code>import { normalizeGrammar } from &#39;../parser/grammarNormalizer&#39;;↩</code>

| text | scope |
| ---------------------------------------------- | --------------------------------- |
| <code>import { normalizeGrammar } from </code> | source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>../parser/grammarNormalizer</code> | string.quoted.single.ts source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>;↩</code> | source.ts |

- `3`: <code>import { tokenizeText } from &#39;../dist&#39;;↩</code>

| text | scope |
| ------------------------------------------ | --------------------------------- |
| <code>import { tokenizeText } from </code> | source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>../dist</code> | string.quoted.single.ts source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>;↩</code> | source.ts |

- `4`: <code>↩</code>

| text | scope |
| -------------- | --------- |
| <code>↩</code> | source.ts |

- `5`: <code>describe&#40;&#39;visualizeAsMD&#39;, &#40;&#41; =&gt; {↩</code>

| text | scope |
| ---------------------------------- | --------------------------------- |
| <code>describe&#40;</code> | source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>visualizeAsMD</code> | string.quoted.single.ts source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>, &#40;&#41; =&gt; {↩</code> | source.ts |

- `6`: <code> const gTypeScript = normalizeGrammar&#40;TypeScript.grammar&#41;;↩</code>

| text | scope |
| -------------------------------------------------------------------------------- | --------- |
| <code> const gTypeScript = normalizeGrammar&#40;TypeScript.grammar&#41;;↩</code> | source.ts |

- `7`: <code>↩</code>

| text | scope |
| -------------- | --------- |
| <code>↩</code> | source.ts |

- `8`: <code> test.each&#96;↩</code>

| text | scope |
| ----------------------- | ---------------------------- |
| <code> test.each</code> | source.ts |
| <code>&#96;</code> | string.template.ts source.ts |
| <code>↩</code> | string.template.ts source.ts |

- `9`: <code> lines↩</code>

| text | scope |
| -------------------- | ---------------------------- |
| <code> lines↩</code> | string.template.ts source.ts |

- `10`: <code> \${tokenize&#40;&#39;&#39;&#41;}↩</code>

| text | scope |
| ----------------------------------------------- | ---------------------------- |
| <code> \${tokenize&#40;&#39;&#39;&#41;}↩</code> | string.template.ts source.ts |

- `11`: <code> \${tokenize&#40;&#39;\\tconst greeting = &quot;hello&quot;;&#92;n&#39;&#41;}↩</code>

| text | scope |
| ------------------------------------------------ | --------------------------------------------------------- |
| <code> \${tokenize&#40;&#39;</code> | string.template.ts source.ts |
| <code>&#92;t</code> | constant.character.escape.ts string.template.ts source.ts |
| <code>const greeting = &quot;hello&quot;;</code> | string.template.ts source.ts |
| <code>&#92;n</code> | constant.character.escape.ts string.template.ts source.ts |
| <code>&#39;&#41;}↩</code> | string.template.ts source.ts |

- `12`: <code> &#96;&#40;&#39;tokenizedLinesToMarkdown&#39;, &#40;{ lines }&#41; =&gt; {↩</code>

| text | scope |
| ------------------------------------------- | --------------------------------- |
| <code> </code> | string.template.ts source.ts |
| <code>&#96;</code> | string.template.ts source.ts |
| <code>&#40;</code> | source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>tokenizedLinesToMarkdown</code> | string.quoted.single.ts source.ts |
| <code>&#39;</code> | string.quoted.single.ts source.ts |
| <code>, &#40;{ lines }&#41; =&gt; {↩</code> | source.ts |

- `13`: <code> expect&#40;tokenizedLinesToMarkdown&#40;lines&#41;&#41;.toMatchSnapshot&#40;&#41;;↩</code>

| text | scope |
| ------------------------------------------------------------------------------------------------- | --------- |
| <code> expect&#40;tokenizedLinesToMarkdown&#40;lines&#41;&#41;.toMatchSnapshot&#40;&#41;;↩</code> | source.ts |

- `14`: <code> }&#41;;↩</code>

| text | scope |
| ---------------------- | --------- |
| <code> }&#41;;↩</code> | source.ts |

- `15`: <code>↩</code>

| text | scope |
| -------------- | --------- |
| <code>↩</code> | source.ts |

- `16`: <code> function tokenize&#40;text: string&#41; {↩</code>

| text | scope |
| -------------------------------------------------------- | --------- |
| <code> function tokenize&#40;text: string&#41; {↩</code> | source.ts |

- `17`: <code> return tokenizeText&#40;text, gTypeScript&#41;;↩</code>

| text | scope |
| -------------------------------------------------------------- | --------- |
| <code> return tokenizeText&#40;text, gTypeScript&#41;;↩</code> | source.ts |

- `18`: <code> }↩</code>

| text | scope |
| ---------------- | --------- |
| <code> }↩</code> | source.ts |

- `19`: <code>}&#41;;↩</code>

| text | scope |
| --------------------- | --------- |
| <code>}&#41;;↩</code> | source.ts |

<!--- cspell:ignore paren -->
27 changes: 27 additions & 0 deletions packages/cspell-grammar/samples/sampleJest.ts
@@ -0,0 +1,27 @@
import { tokenizedLinesToMarkdown } from './visualizeAsMD';
import { TypeScript } from '../grammars';
import { normalizeGrammar } from '../parser/grammarNormalizer';
import { tokenizeText } from '../dist';

const sampleText = `
${
'.'.repeat(22) + // Comment
{ name: 'First' }.name
}
`;

describe('visualizeAsMD', () => {
const gTypeScript = normalizeGrammar(TypeScript.grammar);

test.each`
lines
${tokenize('')}
${tokenize('\tconst greeting = "hello";\n')}
`('tokenizedLinesToMarkdown', ({ lines }) => {
expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot();
});

function tokenize(text: string) {
return tokenizeText(text, gTypeScript);
}
});
2 changes: 2 additions & 0 deletions packages/cspell-grammar/src/grammars/index.ts
@@ -0,0 +1,2 @@
export * as TypeScript from './typescript';
export * as Markdown from './markdown';
78 changes: 63 additions & 15 deletions packages/cspell-grammar/src/grammars/typescript.ts
Expand Up @@ -2,7 +2,8 @@ import { Grammar, Repository } from '..';

const repository: Repository = {
statements: {
patterns: [{ include: '#string' }, { include: '#comment' }],
name: 'code.ts',
patterns: ['#string', '#comment', '#braces'],
},
string: {
patterns: [{ include: '#string_q_single' }, { include: '#string_q_double' }, { include: '#string_template' }],
Expand All @@ -11,38 +12,85 @@ const repository: Repository = {
name: 'string.quoted.single.ts',
begin: "'",
end: /'|((?:[^\\\n])$)/,
captures: 'punctuation.string.ts',
patterns: [{ include: '#string_character_escape' }],
},
string_q_double: {
name: 'string.quoted.double.ts',
begin: '"',
end: /"|((?:[^\\\n])$)/,
captures: 'punctuation.string.ts',
patterns: [{ include: '#string_character_escape' }],
},
string_template: {
name: 'string.template.ts',
begin: '`',
end: '`',
patterns: [{ include: '#string_character_escape' }],
},
string_wrap: {
match: /(?:[^\\\n])$/,
captures: 'punctuation.string.ts',
patterns: [
{
name: 'meta.template.expression.ts',
contentName: 'meta.embedded.line.ts',
begin: '${',
end: '}',
patterns: ['#statements'],
captures: 'punctuation.definition.template.expression.ts',
},
{ include: '#string_character_escape' },
],
},
string_character_escape: {
name: 'constant.character.escape.ts',
match: /\\(x[0-9A-Fa-f]{2}|[0-3][0-7]{0,2}|[4-7][0-7]?|.|$)/,
},
comment: {
patterns: [{ include: '#comment_line' }, { include: '#comment_block' }],
braces: {
patterns: [
{
begin: '(',
end: ')',
captures: 'punctuation.meta.brace.ts',
patterns: ['#statements'],
contentName: 'meta.brace.ts',
},
{
begin: '{',
end: '}',
captures: 'punctuation.meta.brace.ts',
patterns: ['#statements'],
contentName: 'meta.brace.ts',
},
{
begin: '[',
end: ']',
captures: 'punctuation.meta.brace.ts',
patterns: ['#statements'],
contentName: 'meta.brace.ts',
},
],
},
comment_line: {
name: 'comment.line.ts',
match: /\/\/.*/,
},
comment_block: {
name: 'comment.block.ts',
begin: '/*',
end: '*/',
comment: {
patterns: [
{
name: 'comment.line.ts',
comment: 'line comment',
begin: '//',
end: /(?=$)/,
captures: 'punctuation.definition.comment.ts',
},
{
name: 'comment.block.documentation.ts',
comment: 'DocBlock',
begin: /\*\*(?!\/)/,
captures: 'punctuation.definition.comment.ts',
end: '*/',
},
{
name: 'comment.block.ts',
begin: '/*',
end: '*/',
captures: 'punctuation.definition.comment.ts',
},
],
},
};

Expand Down
3 changes: 2 additions & 1 deletion packages/cspell-grammar/src/index.ts
@@ -1 +1,2 @@
export type { Grammar, Pattern, Repository } from './parser/grammarDefinition';
export type { Grammar, Pattern, Repository } from './parser';
export { tokenizeLine, tokenizeText } from './parser';

0 comments on commit 2b7e136

Please sign in to comment.