Skip to content

Commit 14bf407

Browse files
committedAug 14, 2022
Add new ENS normalization specification for wider UTF-8 support (#42, #2376, #2754).

File tree

7 files changed

+448
-13
lines changed

7 files changed

+448
-13
lines changed
 

‎packages/hash/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"dependencies": {
44
"@ethersproject/abstract-signer": "^5.6.2",
55
"@ethersproject/address": "^5.6.1",
6+
"@ethersproject/base64": "^5.6.2",
67
"@ethersproject/bignumber": "^5.6.2",
78
"@ethersproject/bytes": "^5.6.1",
89
"@ethersproject/keccak256": "^5.6.1",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/**
2+
* MIT License
3+
*
4+
* Copyright (c) 2021 Andrew Raffensperger
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*
24+
* This is a near carbon-copy of the original source (link below) with the
25+
* TypeScript typings added and a few tweaks to make it ES3-compatible.
26+
*
27+
* See: https://github.com/adraffy/ens-normalize.js
28+
*/
29+
30+
export type Numbers = Uint8Array | Array<number>;
31+
export type NextFunc = (...args: Array<any>) => number;
32+
33+
// https://github.com/behnammodi/polyfill/blob/master/array.polyfill.js
34+
function flat(array: Array<any>, depth?: number): Array<any> {
35+
if (depth == null) { depth = 1; }
36+
const result: Array<any> = [];
37+
38+
const forEach = result.forEach;
39+
40+
const flatDeep = function (arr: Array<any>, depth: number) {
41+
forEach.call(arr, function (val: any) {
42+
if (depth > 0 && Array.isArray(val)) {
43+
flatDeep(val, depth - 1);
44+
} else {
45+
result.push(val);
46+
}
47+
});
48+
};
49+
50+
flatDeep(array, depth);
51+
return result;
52+
}
53+
54+
function fromEntries<T extends string | number | symbol = string | number | symbol, U = any>(array: Array<[T, U]>): Record<T, U> {
55+
const result: Record<T, U> = <Record<T, U>>{ };
56+
for (let i = 0; i < array.length; i++) {
57+
const value = array[i];
58+
result[value[0]] = value[1];
59+
}
60+
return result;
61+
}
62+
63+
export function decode_arithmetic(bytes: Numbers): Array<number> {
64+
let pos = 0;
65+
function u16() { return (bytes[pos++] << 8) | bytes[pos++]; }
66+
67+
// decode the frequency table
68+
let symbol_count = u16();
69+
let total = 1;
70+
let acc = [0, 1]; // first symbol has frequency 1
71+
for (let i = 1; i < symbol_count; i++) {
72+
acc.push(total += u16());
73+
}
74+
75+
// skip the sized-payload that the last 3 symbols index into
76+
let skip = u16();
77+
let pos_payload = pos;
78+
pos += skip;
79+
80+
let read_width = 0;
81+
let read_buffer = 0;
82+
function read_bit() {
83+
if (read_width == 0) {
84+
// this will read beyond end of buffer
85+
// but (undefined|0) => zero pad
86+
read_buffer = (read_buffer << 8) | bytes[pos++];
87+
read_width = 8;
88+
}
89+
return (read_buffer >> --read_width) & 1;
90+
}
91+
92+
const N = 31;
93+
const FULL = 2**N;
94+
const HALF = FULL >>> 1;
95+
const QRTR = HALF >> 1;
96+
const MASK = FULL - 1;
97+
98+
// fill register
99+
let register = 0;
100+
for (let i = 0; i < N; i++) register = (register << 1) | read_bit();
101+
102+
let symbols = [];
103+
let low = 0;
104+
let range = FULL; // treat like a float
105+
while (true) {
106+
let value = Math.floor((((register - low + 1) * total) - 1) / range);
107+
let start = 0;
108+
let end = symbol_count;
109+
while (end - start > 1) { // binary search
110+
let mid = (start + end) >>> 1;
111+
if (value < acc[mid]) {
112+
end = mid;
113+
} else {
114+
start = mid;
115+
}
116+
}
117+
if (start == 0) break; // first symbol is end mark
118+
symbols.push(start);
119+
let a = low + Math.floor(range * acc[start] / total);
120+
let b = low + Math.floor(range * acc[start+1] / total) - 1
121+
while (((a ^ b) & HALF) == 0) {
122+
register = (register << 1) & MASK | read_bit();
123+
a = (a << 1) & MASK;
124+
b = (b << 1) & MASK | 1;
125+
}
126+
while (a & ~b & QRTR) {
127+
register = (register & HALF) | ((register << 1) & (MASK >>> 1)) | read_bit();
128+
a = (a << 1) ^ HALF;
129+
b = ((b ^ HALF) << 1) | HALF | 1;
130+
}
131+
low = a;
132+
range = 1 + b - a;
133+
}
134+
let offset = symbol_count - 4;
135+
return symbols.map(x => { // index into payload
136+
switch (x - offset) {
137+
case 3: return offset + 0x10100 + ((bytes[pos_payload++] << 16) | (bytes[pos_payload++] << 8) | bytes[pos_payload++]);
138+
case 2: return offset + 0x100 + ((bytes[pos_payload++] << 8) | bytes[pos_payload++]);
139+
case 1: return offset + bytes[pos_payload++];
140+
default: return x - 1;
141+
}
142+
});
143+
}
144+
145+
146+
// returns an iterator which returns the next symbol
147+
export function read_payload(v: Numbers): NextFunc {
148+
let pos = 0;
149+
return () => v[pos++];
150+
}
151+
export function read_compressed_payload(bytes: Numbers): NextFunc {
152+
return read_payload(decode_arithmetic(bytes));
153+
}
154+
155+
// eg. [0,1,2,3...] => [0,-1,1,-2,...]
156+
export function signed(i: number): number {
157+
return (i & 1) ? (~i >> 1) : (i >> 1);
158+
}
159+
160+
function read_counts(n: number, next: NextFunc): Array<number> {
161+
let v = Array(n);
162+
for (let i = 0; i < n; i++) v[i] = 1 + next();
163+
return v;
164+
}
165+
166+
function read_ascending(n: number, next: NextFunc): Array<number> {
167+
let v = Array(n);
168+
for (let i = 0, x = -1; i < n; i++) v[i] = x += 1 + next();
169+
return v;
170+
}
171+
172+
function read_deltas(n: number, next: NextFunc): Array<number> {
173+
let v = Array(n);
174+
for (let i = 0, x = 0; i < n; i++) v[i] = x += signed(next());
175+
return v;
176+
}
177+
178+
export function read_member_array(next: NextFunc, lookup?: Record<number, number>) {
179+
let v = read_ascending(next(), next);
180+
let n = next();
181+
let vX = read_ascending(n, next);
182+
let vN = read_counts(n, next);
183+
for (let i = 0; i < n; i++) {
184+
for (let j = 0; j < vN[i]; j++) {
185+
v.push(vX[i] + j);
186+
}
187+
}
188+
return lookup ? v.map(x => lookup[x]) : v;
189+
}
190+
191+
// returns array of
192+
// [x, ys] => single replacement rule
193+
// [x, ys, n, dx, dx] => linear map
194+
export function read_mapped_map(next: NextFunc): Record<number, Array<number>> {
195+
let ret = [];
196+
while (true) {
197+
let w = next();
198+
if (w == 0) break;
199+
ret.push(read_linear_table(w, next));
200+
}
201+
while (true) {
202+
let w = next() - 1;
203+
if (w < 0) break;
204+
ret.push(read_replacement_table(w, next));
205+
}
206+
return fromEntries<number, Array<number>>(flat(ret));
207+
}
208+
209+
export function read_zero_terminated_array(next: NextFunc): Array<number> {
210+
let v = [];
211+
while (true) {
212+
let i = next();
213+
if (i == 0) break;
214+
v.push(i);
215+
}
216+
return v;
217+
}
218+
219+
function read_transposed(n: number, w: number, next: NextFunc, lookup?: NextFunc): Array<Array<number>> {
220+
let m = Array(n).fill(undefined).map(() => []);
221+
for (let i = 0; i < w; i++) {
222+
read_deltas(n, next).forEach((x, j) => m[j].push(lookup ? lookup(x) : x));
223+
}
224+
return m;
225+
}
226+
227+
228+
function read_linear_table(w: number, next: NextFunc): Array<Array<number | Array<number>>> {
229+
let dx = 1 + next();
230+
let dy = next();
231+
let vN = read_zero_terminated_array(next);
232+
let m = read_transposed(vN.length, 1+w, next);
233+
return flat(m.map((v, i) => {
234+
const x = v[0], ys = v.slice(1);
235+
//let [x, ...ys] = v;
236+
//return Array(vN[i]).fill().map((_, j) => {
237+
return Array(vN[i]).fill(undefined).map((_, j) => {
238+
let j_dy = j * dy;
239+
return [x + j * dx, ys.map(y => y + j_dy)];
240+
});
241+
}));
242+
}
243+
244+
function read_replacement_table(w: number, next: NextFunc): Array<[ number, Array<number> ]> {
245+
let n = 1 + next();
246+
let m = read_transposed(n, 1+w, next);
247+
return m.map(v => [v[0], v.slice(1)]);
248+
}
249+
250+
export type Branch = {
251+
set: Set<number>;
252+
node: Node;
253+
};
254+
255+
export type Node = {
256+
branches: Array<Branch>;
257+
valid: boolean;
258+
fe0f: boolean;
259+
save: boolean;
260+
check: boolean;
261+
};
262+
263+
export function read_emoji_trie(next: NextFunc): Node {
264+
let sorted = read_member_array(next).sort((a, b) => a - b);
265+
return read();
266+
function read(): Node {
267+
let branches = [];
268+
while (true) {
269+
let keys = read_member_array(next);
270+
if (keys.length == 0) break;
271+
branches.push({set: new Set(keys.map(i => sorted[i])), node: read()});
272+
}
273+
branches.sort((a, b) => b.set.size - a.set.size);
274+
let flag = next();
275+
return {
276+
branches,
277+
valid: (flag & 1) != 0,
278+
fe0f: (flag & 2) != 0,
279+
save: (flag & 4) != 0,
280+
check: (flag & 8) != 0,
281+
};
282+
}
283+
}

‎packages/hash/src.ts/ens-normalize/include.ts

+37
Large diffs are not rendered by default.
+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/**
2+
* MIT License
3+
*
4+
* Copyright (c) 2021 Andrew Raffensperger
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*
24+
* This is a near carbon-copy of the original source (link below) with the
25+
* TypeScript typings added and a few tweaks to make it ES3-compatible.
26+
*
27+
* See: https://github.com/adraffy/ens-normalize.js
28+
*/
29+
30+
import { toUtf8CodePoints } from "@ethersproject/strings";
31+
32+
import { getData } from './include.js';
33+
const r = getData();
34+
35+
import {read_member_array, read_mapped_map, read_emoji_trie} from './decoder.js';
36+
37+
import type { Node } from "./decoder.js";
38+
39+
// @TODO: This should be lazily loaded
40+
41+
const VALID = new Set(read_member_array(r));
42+
const IGNORED = new Set(read_member_array(r));
43+
const MAPPED = read_mapped_map(r);
44+
const EMOJI_ROOT = read_emoji_trie(r);
45+
//const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b)));
46+
47+
function nfc(s: string): string {
48+
return s.normalize('NFC');
49+
}
50+
51+
function filter_fe0f(cps: Array<number>): Array<number> {
52+
return cps.filter(cp => cp != 0xFE0F);
53+
}
54+
55+
export function ens_normalize(name: string, beautify = false): string {
56+
const input = toUtf8CodePoints(name).reverse(); // flip for pop
57+
const output = [];
58+
while (input.length) {
59+
const emoji = consume_emoji_reversed(input, EMOJI_ROOT);
60+
if (emoji) {
61+
output.push(...(beautify ? emoji : filter_fe0f(emoji)));
62+
continue;
63+
}
64+
const cp = input.pop();
65+
if (VALID.has(cp)) {
66+
output.push(cp);
67+
continue;
68+
}
69+
if (IGNORED.has(cp)) {
70+
continue;
71+
}
72+
let cps = MAPPED[cp];
73+
if (cps) {
74+
output.push(...cps);
75+
continue;
76+
}
77+
throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`);
78+
}
79+
return nfc(String.fromCodePoint(...output));
80+
}
81+
82+
83+
function consume_emoji_reversed(cps: Array<number>, node: Node, eaten?: Array<number>) {
84+
let emoji;
85+
const stack = [];
86+
let pos = cps.length;
87+
if (eaten) { eaten.length = 0; } // clear input buffer (if needed)
88+
while (pos) {
89+
const cp = cps[--pos];
90+
const branch = node.branches.find(x => x.set.has(cp));
91+
if (branch == null) { break; }
92+
node = branch.node;
93+
if (!node) { break; }
94+
stack.push(cp);
95+
if (node.fe0f) {
96+
stack.push(0xFE0F);
97+
if (pos > 0 && cps[pos - 1] == 0xFE0F) { pos--; }
98+
}
99+
if (node.valid) { // this is a valid emoji (so far)
100+
emoji = stack.slice(); // copy stack
101+
if (eaten) { eaten.push(...cps.slice(pos).reverse()); } // copy input (if needed)
102+
cps.length = pos; // truncate
103+
}
104+
}
105+
return emoji;
106+
}
107+

‎packages/hash/src.ts/index.ts

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import { id } from "./id";
44
import { dnsEncode, isValidName, namehash } from "./namehash";
55
import { hashMessage, messagePrefix } from "./message";
66

7+
import { ens_normalize as ensNormalize } from "./ens-normalize/lib";
8+
79
import { TypedDataEncoder as _TypedDataEncoder } from "./typed-data";
810

911
export {
@@ -13,6 +15,8 @@ export {
1315
namehash,
1416
isValidName,
1517

18+
ensNormalize,
19+
1620
messagePrefix,
1721
hashMessage,
1822

‎packages/hash/src.ts/namehash.ts

+14-12
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,21 @@
11
import { concat, hexlify } from "@ethersproject/bytes";
2-
import { nameprep, toUtf8Bytes } from "@ethersproject/strings";
2+
import { toUtf8Bytes } from "@ethersproject/strings";
33
import { keccak256 } from "@ethersproject/keccak256";
44

55
import { Logger } from "@ethersproject/logger";
66
import { version } from "./_version";
77
const logger = new Logger(version);
88

9+
import { ens_normalize } from "./ens-normalize/lib";
10+
911
const Zeros = new Uint8Array(32);
1012
Zeros.fill(0);
1113

1214
const Partition = new RegExp("^((.*)\\.)?([^.]+)$");
1315

1416
export function isValidName(name: string): boolean {
1517
try {
16-
const comps = name.split(".");
17-
for (let i = 0; i < comps.length; i++) {
18-
if (nameprep(comps[i]).length === 0) {
19-
throw new Error("empty")
20-
}
21-
}
22-
return true;
18+
return ens_normalize(name).length !== 0;
2319
} catch (error) { }
2420
return false;
2521
}
@@ -30,14 +26,14 @@ export function namehash(name: string): string {
3026
logger.throwArgumentError("invalid ENS name; not a string", "name", name);
3127
}
3228

33-
let current = name;
29+
let current = ens_normalize(name);
3430
let result: string | Uint8Array = Zeros;
3531
while (current.length) {
3632
const partition = current.match(Partition);
3733
if (partition == null || partition[2] === "") {
3834
logger.throwArgumentError("invalid ENS address; missing component", "name", name);
3935
}
40-
const label = toUtf8Bytes(nameprep(partition[3]));
36+
const label = toUtf8Bytes(partition[3]);
4137
result = keccak256(concat([result, keccak256(label)]));
4238

4339
current = partition[2] || "";
@@ -47,10 +43,16 @@ export function namehash(name: string): string {
4743
}
4844

4945
export function dnsEncode(name: string): string {
46+
name = ens_normalize(name)
5047
return hexlify(concat(name.split(".").map((comp) => {
48+
49+
// DNS does not allow components over 63 bytes in length
50+
if (toUtf8Bytes(comp).length > 63) {
51+
throw new Error("invalid DNS encoded entry; length exceeds 63 bytes");
52+
}
53+
5154
// We jam in an _ prefix to fill in with the length later
52-
// Note: Nameprep throws if the component is over 63 bytes
53-
const bytes = toUtf8Bytes("_" + nameprep(comp));
55+
const bytes = toUtf8Bytes("_" + comp);
5456
bytes[0] = bytes.length - 1;
5557
return bytes;
5658
}))) + "00";

‎packages/hash/tsconfig.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"exclude": [],
77
"extends": "../../tsconfig.package.json",
88
"include": [
9-
"./src.ts/*"
9+
"./src.ts/*",
10+
"./src.ts/ens-normalize/*"
1011
]
1112
}

0 commit comments

Comments
 (0)
Please sign in to comment.