Skip to content

Commit 41152e1

Browse files
committedMay 5, 2021
fix: Correctly handle leading and trailing single quote
1 parent 61bcd67 commit 41152e1

File tree

4 files changed

+21
-9
lines changed

4 files changed

+21
-9
lines changed
 

‎packages/cspell-lib/src/util/text.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ describe('Validate individual regexp', () => {
300300
test.each`
301301
testName | regexp | text | expectedResult
302302
${'regExWordsAndDigits'} | ${regExWordsAndDigits} | ${''} | ${[]}
303-
${'regExWordsAndDigits'} | ${regExWordsAndDigits} | ${" x = 'Don\\'t'"} | ${['x', 1, "Don\\'t", 6]}
303+
${'regExWordsAndDigits'} | ${regExWordsAndDigits} | ${" x = 'Don\\'t'"} | ${['x', 1, "'Don\\'t'", 5]}
304304
${'regExWordsAndDigits'} | ${regExWordsAndDigits} | ${'12345'} | ${[]}
305305
${'regExWordsAndDigits'} | ${regExWordsAndDigits} | ${'12345a'} | ${['12345a', 0]}
306306
${'regExWordsAndDigits'} | ${regExWordsAndDigits} | ${'b12345'} | ${['b12345', 0]}

‎packages/cspell-lib/src/util/textRegex.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export const regExUpperSOrIng = /(\p{Lu}+\\?['’]?(?:s|ing|ies|es|ings|ed|ning)
55
export const regExSplitWords = /(\p{Ll})(\p{Lu})/gu;
66
export const regExSplitWords2 = /(\p{Lu})(\p{Lu}\p{Ll})/gu;
77
export const regExWords = /\p{L}(?:(?:\\?['])?\p{L})*/gu;
8-
export const regExWordsAndDigits = /(?:\d+)?[\p{L}_-](?:(?:\\?['])?[\p{L}\w-])*/gu;
8+
export const regExWordsAndDigits = /(?:\d+)?[\p{L}_'-](?:(?:\\?['])?[\p{L}\w'-])*/gu;
99
export const regExIgnoreCharacters = /\p{sc=Hiragana}|\p{sc=Han}|\p{sc=Katakana}|[\u30A0-\u30FF]|[\p{sc=Hangul}]/gu;
1010
export const regExFirstUpper = /^\p{Lu}\p{Ll}+$/u;
1111
export const regExAllUpper = /^\p{Lu}+$/u;

‎packages/cspell-lib/src/util/wordSplitter.test.ts

+17-7
Original file line numberDiff line numberDiff line change
@@ -65,26 +65,27 @@ describe('Validate wordSplitter', () => {
6565
expect(r).toMatchSnapshot(); // Use snapshots to ensure all possible options are generated.
6666
});
6767

68-
interface PartialTextOffsetWithValid {
68+
interface PartialTextOffsetWithIsFound {
6969
text: string;
7070
offset?: number;
7171
isFound?: boolean;
7272
}
7373

7474
interface TestSplit {
7575
text: string;
76-
expectedWords: PartialTextOffsetWithValid[];
76+
expectedWords: PartialTextOffsetWithIsFound[];
7777
}
7878

79-
function tov(p: PartialTextOffsetWithValid | string, isValid = true): PartialTextOffsetWithValid {
79+
/** to PartialTextOffsetWithIsFound */
80+
function tov(p: PartialTextOffsetWithIsFound | string, defaultIsFound = true): PartialTextOffsetWithIsFound {
8081
if (typeof p === 'string') {
8182
p = { text: p };
8283
}
83-
const { isFound = isValid } = p;
84+
const { isFound = defaultIsFound } = p;
8485
return { ...p, isFound };
8586
}
8687

87-
function splitTov(t: string): PartialTextOffsetWithValid[] {
88+
function splitTov(t: string): PartialTextOffsetWithIsFound[] {
8889
if (!t) return [];
8990
const parts = t.split('|');
9091
return parts.map((p) => tov(p, has({ text: p, offset: 0 })));
@@ -173,11 +174,13 @@ describe('Validate wordSplitter', () => {
173174
${'nstatic'} | ${'static'} | ${1}
174175
${'techo'} | ${'echo'} | ${1}
175176
${`n'cpp`} | ${'cpp'} | ${1}
176-
${`n'log`} | ${'log'} | ${4}
177+
${`n'log`} | ${'log'} | ${7}
177178
${'64-bit'} | ${'bit'} | ${1}
178179
${'128-bit'} | ${'bit'} | ${1}
179180
${'256-sha'} | ${'256-sha'} | ${6}
180181
${`REFACTOR'd`} | ${'REFACTOR'} | ${2}
182+
${`dogs'`} | ${`dogs'`} | ${2}
183+
${`planets’`} | ${`planets’`} | ${2}
181184
`('split `$text` in doc', ({ text, expectedWords, calls }: TestSplit2) => {
182185
const expectedWordSegments = splitTov(expectedWords);
183186
const doc = sampleText();
@@ -318,6 +321,10 @@ function sampleWordSet() {
318321
well educated
319322
words separated by singleQuote
320323
256-sha
324+
dogs'
325+
leashes
326+
writers
327+
planets’
321328
`
322329
.split(/\s+/g)
323330
.map((a) => a.trim())
@@ -343,8 +350,11 @@ function sampleText() {
343350
344351
256-sha
345352
346-
128-bit values
353+
- The dogs' leashes (multiple dogs).
354+
- The writers' desks (multiple writers).
355+
- The planets’ atmospheres (multiple planets).
347356
357+
128-bit values
348358
349359
`;
350360
}

‎packages/cspell-lib/src/util/wordSplitter.ts

+2
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ function genSymbolBreaks(line: LineSegment): SortedBreaks[] {
265265
return [
266266
calcBreaksForRegEx(line, regExPossibleWordBreaks, calcBreaks),
267267
calcBreaksForRegEx(line, /\d+/g, calcBreaks),
268+
calcBreaksForRegEx(line, /['](?!\p{L})/gu, calcBreaks), // break on trailing '
269+
calcBreaksForRegEx(line, /(?<!\p{L})[']/gu, calcBreaks), // break on leading '
268270
calcBreaksForRegEx(line, regExEscapeCharacters, calcBreaks),
269271
];
270272
}

0 commit comments

Comments
 (0)
Please sign in to comment.