|
1 | 1 | import assertString from './util/assertString';
|
2 | 2 |
|
3 |
| -const localeReg = /^[A-Za-z]{2,4}([_-]([A-Za-z]{4}|[\d]{3}))?([_-]([A-Za-z]{2}|[\d]{3}))?$/; |
| 3 | +/* |
| 4 | + = 3ALPHA ; selected ISO 639 codes |
| 5 | + *2("-" 3ALPHA) ; permanently reserved |
| 6 | + */ |
| 7 | +const extlang = '([A-Za-z]{3}(-[A-Za-z]{3}){0,2})'; |
| 8 | + |
| 9 | +/* |
| 10 | + = 2*3ALPHA ; shortest ISO 639 code |
| 11 | + ["-" extlang] ; sometimes followed by |
| 12 | + ; extended language subtags |
| 13 | + / 4ALPHA ; or reserved for future use |
| 14 | + / 5*8ALPHA ; or registered language subtag |
| 15 | + */ |
| 16 | +const language = `(([a-zA-Z]{2,3}(-${extlang})?)|([a-zA-Z]{5,8}))`; |
| 17 | + |
| 18 | +/* |
| 19 | + = 4ALPHA ; ISO 15924 code |
| 20 | + */ |
| 21 | +const script = '([A-Za-z]{4})'; |
| 22 | + |
| 23 | +/* |
| 24 | + = 2ALPHA ; ISO 3166-1 code |
| 25 | + / 3DIGIT ; UN M.49 code |
| 26 | + */ |
| 27 | +const region = '([A-Za-z]{2}|\\d{3})'; |
| 28 | + |
| 29 | +/* |
| 30 | + = 5*8alphanum ; registered variants |
| 31 | + / (DIGIT 3alphanum) |
| 32 | + */ |
| 33 | +const variant = '([A-Za-z0-9]{5,8}|(\\d[A-Z-a-z0-9]{3}))'; |
| 34 | + |
| 35 | +/* |
| 36 | + = DIGIT ; 0 - 9 |
| 37 | + / %x41-57 ; A - W |
| 38 | + / %x59-5A ; Y - Z |
| 39 | + / %x61-77 ; a - w |
| 40 | + / %x79-7A ; y - z |
| 41 | + */ |
| 42 | +const singleton = '(\\d|[A-W]|[Y-Z]|[a-w]|[y-z])'; |
| 43 | + |
| 44 | +/* |
| 45 | + = singleton 1*("-" (2*8alphanum)) |
| 46 | + ; Single alphanumerics |
| 47 | + ; "x" reserved for private use |
| 48 | + */ |
| 49 | +const extension = `(${singleton}(-[A-Za-z0-9]{2,8})+)`; |
| 50 | + |
| 51 | +/* |
| 52 | + = "x" 1*("-" (1*8alphanum)) |
| 53 | + */ |
| 54 | +const privateuse = '(x(-[A-Za-z0-9]{1,8})+)'; |
| 55 | + |
| 56 | +// irregular tags do not match the 'langtag' production and would not |
| 57 | +// otherwise be considered 'well-formed'. These tags are all valid, but |
| 58 | +// most are deprecated in favor of more modern subtags or subtag combination |
| 59 | + |
| 60 | +const irregular = '((en-GB-oed)|(i-ami)|(i-bnn)|(i-default)|(i-enochian)|' + |
| 61 | + '(i-hak)|(i-klingon)|(i-lux)|(i-mingo)|(i-navajo)|(i-pwn)|(i-tao)|' + |
| 62 | + '(i-tay)|(i-tsu)|(sgn-BE-FR)|(sgn-BE-NL)|(sgn-CH-DE))'; |
| 63 | + |
| 64 | +// regular tags match the 'langtag' production, but their subtags are not |
| 65 | +// extended language or variant subtags: their meaning is defined by |
| 66 | +// their registration and all of these are deprecated in favor of a more |
| 67 | +// modern subtag or sequence of subtags |
| 68 | + |
| 69 | +const regular = '((art-lojban)|(cel-gaulish)|(no-bok)|(no-nyn)|(zh-guoyu)|' + |
| 70 | + '(zh-hakka)|(zh-min)|(zh-min-nan)|(zh-xiang))'; |
| 71 | + |
| 72 | +/* |
| 73 | + = irregular ; non-redundant tags registered |
| 74 | + / regular ; during the RFC 3066 era |
| 75 | +
|
| 76 | + */ |
| 77 | +const grandfathered = `(${irregular}|${regular})`; |
| 78 | + |
| 79 | +/* |
| 80 | + RFC 5646 defines delimitation of subtags via a hyphen: |
| 81 | +
|
| 82 | + "Subtag" refers to a specific section of a tag, delimited by a |
| 83 | + hyphen, such as the subtags 'zh', 'Hant', and 'CN' in the tag "zh- |
| 84 | + Hant-CN". Examples of subtags in this document are enclosed in |
| 85 | + single quotes ('Hant') |
| 86 | +
|
| 87 | + However, we need to add "_" to maintain the existing behaviour. |
| 88 | + */ |
| 89 | +const delimiter = '(-|_)'; |
| 90 | + |
| 91 | +/* |
| 92 | + = language |
| 93 | + ["-" script] |
| 94 | + ["-" region] |
| 95 | + *("-" variant) |
| 96 | + *("-" extension) |
| 97 | + ["-" privateuse] |
| 98 | + */ |
| 99 | +const langtag = `${language}(${delimiter}${script})?(${delimiter}${region})?(${delimiter}${variant})*(${delimiter}${extension})*(${delimiter}${privateuse})?`; |
| 100 | + |
| 101 | +/* |
| 102 | + Regex implementation based on BCP RFC 5646 |
| 103 | + Tags for Identifying Languages |
| 104 | + https://www.rfc-editor.org/rfc/rfc5646.html |
| 105 | + */ |
| 106 | +const languageTagRegex = new RegExp(`(^${privateuse}$)|(^${grandfathered}$)|(^${langtag}$)`); |
4 | 107 |
|
5 | 108 | export default function isLocale(str) {
|
6 | 109 | assertString(str);
|
7 |
| - if (str === 'en_US_POSIX' || str === 'ca_ES_VALENCIA') { |
8 |
| - return true; |
9 |
| - } |
10 |
| - return localeReg.test(str); |
| 110 | + return languageTagRegex.test(str); |
11 | 111 | }
|
0 commit comments