Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// cSpell:ignore ings ning gimuy
// cSpell:words xregexp
import { xregexp as XRegExp } from 'cspell-util-bundle';
const regExUpperSOrIng = XRegExp('(\\p{Lu}+\'?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})', 'g');
const regExSplitWords = XRegExp('(\\p{Ll})(\\p{Lu})', 'g');
const regExSplitWords2 = XRegExp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g');
/**
* Split camelCase words into an array of strings.
*/
export function splitCamelCaseWord(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, s => s[0] + s.substr(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = XRegExp.replace(wPrime, regExSplitWords, '$1' + separator + '$2');
const pass2 = XRegExp.replace(pass1, regExSplitWords2, '$1' + separator + '$2');
return XRegExp.split(pass2, separator);
}
export function extractWordsFromTextOffset(text: TextOffset): Sequence {
const reg = XRegExp(regExWords);
const reg2 = XRegExp(regExWords);
return matchToTextOffset(reg, text)
// remove characters that match against \p{L} but are not letters (Chinese characters are an example).
.map(({ text, offset }) => ({
text: XRegExp.replace(text, regExIgnoreCharacters, (match: string) => ' '.repeat(match.length)),
offset,
}))
.concatMap(wo => matchToTextOffset(reg2, wo))
.filter(wo => !!wo.text);
}
}
export interface TextDocumentOffset extends TextOffset {
uri?: string;
doc: string;
row: number;
col: number;
}
const regExLines = /.*(\r?\n|$)/g;
// const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi');
const regExUpperSOrIng = XRegExp('(\\p{Lu}+\\\\?[\'’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})', 'g');
const regExSplitWords = XRegExp('(\\p{Ll})(\\p{Lu})', 'g');
const regExSplitWords2 = XRegExp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g');
const regExWords = XRegExp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g');
const regExIgnoreCharacters = XRegExp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}|[\\u30A0-\\u30FF]|[\\p{Hangul}]', 'g');
const regExFirstUpper = XRegExp('^\\p{Lu}\\p{Ll}+$');
const regExAllUpper = XRegExp('^\\p{Lu}+$');
const regExAllLower = XRegExp('^\\p{Ll}+$');
const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/;
const regExAccents = XRegExp('\\p{M}', 'g');
export function splitCamelCaseWordWithOffset(wo: TextOffset): Array {
return splitCamelCaseWord(wo.text)
.map(scanMap(
(last, text) => ({ text, offset: last.offset + last.text.length }),
{ text: '', offset: wo.offset }
));
}
}
const regExLines = /.*(\r?\n|$)/g;
// const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi');
const regExUpperSOrIng = XRegExp('(\\p{Lu}+\\\\?[\'’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})', 'g');
const regExSplitWords = XRegExp('(\\p{Ll})(\\p{Lu})', 'g');
const regExSplitWords2 = XRegExp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g');
const regExWords = XRegExp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g');
const regExIgnoreCharacters = XRegExp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}|[\\u30A0-\\u30FF]|[\\p{Hangul}]', 'g');
const regExFirstUpper = XRegExp('^\\p{Lu}\\p{Ll}+$');
const regExAllUpper = XRegExp('^\\p{Lu}+$');
const regExAllLower = XRegExp('^\\p{Ll}+$');
const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/;
const regExAccents = XRegExp('\\p{M}', 'g');
export function splitCamelCaseWordWithOffset(wo: TextOffset): Array {
return splitCamelCaseWord(wo.text)
.map(scanMap(
(last, text) => ({ text, offset: last.offset + last.text.length }),
{ text: '', offset: wo.offset }
));
}
/**
* Split camelCase words into an array of strings.
*/
export function splitCamelCaseWord(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, s => s[0] + s.substr(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = XRegExp.replace(wPrime, regExSplitWords, '$1' + separator + '$2');
export interface TextOffset {
text: string;
offset: number;
}
export interface TextDocumentOffset extends TextOffset {
uri?: string;
doc: string;
row: number;
col: number;
}
const regExLines = /.*(\r?\n|$)/g;
// const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi');
const regExUpperSOrIng = XRegExp('(\\p{Lu}+\\\\?[\'’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})', 'g');
const regExSplitWords = XRegExp('(\\p{Ll})(\\p{Lu})', 'g');
const regExSplitWords2 = XRegExp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g');
const regExWords = XRegExp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g');
const regExIgnoreCharacters = XRegExp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}|[\\u30A0-\\u30FF]|[\\p{Hangul}]', 'g');
const regExFirstUpper = XRegExp('^\\p{Lu}\\p{Ll}+$');
const regExAllUpper = XRegExp('^\\p{Lu}+$');
const regExAllLower = XRegExp('^\\p{Ll}+$');
const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/;
const regExAccents = XRegExp('\\p{M}', 'g');
export function splitCamelCaseWordWithOffset(wo: TextOffset): Array {
return splitCamelCaseWord(wo.text)
.map(scanMap(
(last, text) => ({ text, offset: last.offset + last.text.length }),
export function extractWordsFromTextOffset(text: TextOffset): Sequence {
const reg = XRegExp(regExWords);
const reg2 = XRegExp(regExWords);
return matchToTextOffset(reg, text)
// remove characters that match against \p{L} but are not letters (Chinese characters are an example).
.map(({ text, offset }) => ({
text: XRegExp.replace(text, regExIgnoreCharacters, (match: string) => ' '.repeat(match.length)),
offset,
}))
.concatMap(wo => matchToTextOffset(reg2, wo))
.filter(wo => !!wo.text);
}
uri?: string;
doc: string;
row: number;
col: number;
}
const regExLines = /.*(\r?\n|$)/g;
// const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi');
const regExUpperSOrIng = XRegExp('(\\p{Lu}+\\\\?[\'’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})', 'g');
const regExSplitWords = XRegExp('(\\p{Ll})(\\p{Lu})', 'g');
const regExSplitWords2 = XRegExp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g');
const regExWords = XRegExp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g');
const regExIgnoreCharacters = XRegExp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}|[\\u30A0-\\u30FF]|[\\p{Hangul}]', 'g');
const regExFirstUpper = XRegExp('^\\p{Lu}\\p{Ll}+$');
const regExAllUpper = XRegExp('^\\p{Lu}+$');
const regExAllLower = XRegExp('^\\p{Ll}+$');
const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/;
const regExAccents = XRegExp('\\p{M}', 'g');
export function splitCamelCaseWordWithOffset(wo: TextOffset): Array {
return splitCamelCaseWord(wo.text)
.map(scanMap(
(last, text) => ({ text, offset: last.offset + last.text.length }),
{ text: '', offset: wo.offset }
));
}
/**
* Split camelCase words into an array of strings.
*/
import { xregexp as XRegExp } from 'cspell-util-bundle';
import { genSequence, Sequence } from 'gensequence';
import * as Text from './text';
import * as path from 'path';
import { mkdirp } from 'fs-extra';
import * as Trie from 'cspell-trie-lib';
import { writeSeqToFile } from './fileWriter';
import { uniqueFilter } from 'hunspell-reader/dist/util';
const regNonWordOrSpace = XRegExp("[^\\p{L}' ]+", 'gi');
const regExpSpaceOrDash = /(?:\s+)|(?:-+)/g;
const regExpRepeatChars = /(.)\1{3,}/i;
export type Logger = (message?: any, ...optionalParams: any[]) => void;
let log: Logger = defaultLogger;
export function setLogger(logger?: Logger) {
log = logger ?? defaultLogger;
}
function defaultLogger(message?: any, ...optionalParams: any[]) {
console.log(message, ...optionalParams);
}
export function normalizeWords(lines: Sequence) {