@@ -49,6 +49,48 @@ function DBCSCodec(codecOptions, iconv) {
49
49
for ( var i = 0 ; i < mappingTable . length ; i ++ )
50
50
this . _addDecodeChunk ( mappingTable [ i ] ) ;
51
51
52
+ // Load & create GB18030 tables when needed.
53
+ if ( typeof codecOptions . gb18030 === 'function' ) {
54
+ this . gb18030 = codecOptions . gb18030 ( ) ; // Load GB18030 ranges.
55
+
56
+ // Add GB18030 common decode nodes.
57
+ var commonThirdByteNodeIdx = this . decodeTables . length ;
58
+ this . decodeTables . push ( UNASSIGNED_NODE . slice ( 0 ) ) ;
59
+
60
+ var commonFourthByteNodeIdx = this . decodeTables . length ;
61
+ this . decodeTables . push ( UNASSIGNED_NODE . slice ( 0 ) ) ;
62
+
63
+ // Fill out the tree
64
+ var firstByteNode = this . decodeTables [ 0 ] ;
65
+ for ( var i = 0x81 ; i <= 0xFE ; i ++ ) {
66
+ var secondByteNode = this . decodeTables [ NODE_START - firstByteNode [ i ] ] ;
67
+ for ( var j = 0x30 ; j <= 0x39 ; j ++ ) {
68
+ if ( secondByteNode [ j ] === UNASSIGNED ) {
69
+ secondByteNode [ j ] = NODE_START - commonThirdByteNodeIdx ;
70
+ } else if ( secondByteNode [ j ] > NODE_START ) {
71
+ throw new Error ( "gb18030 decode tables conflict at byte 2" ) ;
72
+ }
73
+
74
+ var thirdByteNode = this . decodeTables [ NODE_START - secondByteNode [ j ] ] ;
75
+ for ( var k = 0x81 ; k <= 0xFE ; k ++ ) {
76
+ if ( thirdByteNode [ k ] === UNASSIGNED ) {
77
+ thirdByteNode [ k ] = NODE_START - commonFourthByteNodeIdx ;
78
+ } else if ( thirdByteNode [ k ] === NODE_START - commonFourthByteNodeIdx ) {
79
+ continue ;
80
+ } else if ( thirdByteNode [ k ] > NODE_START ) {
81
+ throw new Error ( "gb18030 decode tables conflict at byte 3" ) ;
82
+ }
83
+
84
+ var fourthByteNode = this . decodeTables [ NODE_START - thirdByteNode [ k ] ] ;
85
+ for ( var l = 0x30 ; l <= 0x39 ; l ++ ) {
86
+ if ( fourthByteNode [ l ] === UNASSIGNED )
87
+ fourthByteNode [ l ] = GB18030_CODE ;
88
+ }
89
+ }
90
+ }
91
+ }
92
+ }
93
+
52
94
this . defaultCharUnicode = iconv . defaultCharUnicode ;
53
95
54
96
@@ -92,30 +134,6 @@ function DBCSCodec(codecOptions, iconv) {
92
134
this . defCharSB = this . encodeTable [ 0 ] [ iconv . defaultCharSingleByte . charCodeAt ( 0 ) ] ;
93
135
if ( this . defCharSB === UNASSIGNED ) this . defCharSB = this . encodeTable [ 0 ] [ '?' ] ;
94
136
if ( this . defCharSB === UNASSIGNED ) this . defCharSB = "?" . charCodeAt ( 0 ) ;
95
-
96
-
97
- // Load & create GB18030 tables when needed.
98
- if ( typeof codecOptions . gb18030 === 'function' ) {
99
- this . gb18030 = codecOptions . gb18030 ( ) ; // Load GB18030 ranges.
100
-
101
- // Add GB18030 decode tables.
102
- var thirdByteNodeIdx = this . decodeTables . length ;
103
- var thirdByteNode = this . decodeTables [ thirdByteNodeIdx ] = UNASSIGNED_NODE . slice ( 0 ) ;
104
-
105
- var fourthByteNodeIdx = this . decodeTables . length ;
106
- var fourthByteNode = this . decodeTables [ fourthByteNodeIdx ] = UNASSIGNED_NODE . slice ( 0 ) ;
107
-
108
- for ( var i = 0x81 ; i <= 0xFE ; i ++ ) {
109
- var secondByteNodeIdx = NODE_START - this . decodeTables [ 0 ] [ i ] ;
110
- var secondByteNode = this . decodeTables [ secondByteNodeIdx ] ;
111
- for ( var j = 0x30 ; j <= 0x39 ; j ++ )
112
- secondByteNode [ j ] = NODE_START - thirdByteNodeIdx ;
113
- }
114
- for ( var i = 0x81 ; i <= 0xFE ; i ++ )
115
- thirdByteNode [ i ] = NODE_START - fourthByteNodeIdx ;
116
- for ( var i = 0x30 ; i <= 0x39 ; i ++ )
117
- fourthByteNode [ i ] = GB18030_CODE
118
- }
119
137
}
120
138
121
139
DBCSCodec . prototype . encoder = DBCSEncoder ;
@@ -124,7 +142,7 @@ DBCSCodec.prototype.decoder = DBCSDecoder;
124
142
// Decoder helpers
125
143
DBCSCodec . prototype . _getDecodeTrieNode = function ( addr ) {
126
144
var bytes = [ ] ;
127
- for ( ; addr > 0 ; addr >>= 8 )
145
+ for ( ; addr > 0 ; addr >>> = 8 )
128
146
bytes . push ( addr & 0xFF ) ;
129
147
if ( bytes . length == 0 )
130
148
bytes . push ( 0 ) ;
@@ -249,19 +267,32 @@ DBCSCodec.prototype._setEncodeSequence = function(seq, dbcsCode) {
249
267
250
268
DBCSCodec . prototype . _fillEncodeTable = function ( nodeIdx , prefix , skipEncodeChars ) {
251
269
var node = this . decodeTables [ nodeIdx ] ;
270
+ var hasValues = false ;
271
+ var subNodeEmpty = { } ;
252
272
for ( var i = 0 ; i < 0x100 ; i ++ ) {
253
273
var uCode = node [ i ] ;
254
274
var mbCode = prefix + i ;
255
275
if ( skipEncodeChars [ mbCode ] )
256
276
continue ;
257
277
258
- if ( uCode >= 0 )
278
+ if ( uCode >= 0 ) {
259
279
this . _setEncodeChar ( uCode , mbCode ) ;
260
- else if ( uCode <= NODE_START )
261
- this . _fillEncodeTable ( NODE_START - uCode , mbCode << 8 , skipEncodeChars ) ;
262
- else if ( uCode <= SEQ_START )
280
+ hasValues = true ;
281
+ } else if ( uCode <= NODE_START ) {
282
+ var subNodeIdx = NODE_START - uCode ;
283
+ if ( ! subNodeEmpty [ subNodeIdx ] ) { // Skip empty subtrees (they are too large in gb18030).
284
+ var newPrefix = ( mbCode << 8 ) >>> 0 ; // NOTE: '>>> 0' keeps 32-bit num positive.
285
+ if ( this . _fillEncodeTable ( subNodeIdx , newPrefix , skipEncodeChars ) )
286
+ hasValues = true ;
287
+ else
288
+ subNodeEmpty [ subNodeIdx ] = true ;
289
+ }
290
+ } else if ( uCode <= SEQ_START ) {
263
291
this . _setEncodeSequence ( this . decodeTableSeq [ SEQ_START - uCode ] , mbCode ) ;
292
+ hasValues = true ;
293
+ }
264
294
}
295
+ return hasValues ;
265
296
}
266
297
267
298
@@ -388,10 +419,15 @@ DBCSEncoder.prototype.write = function(str) {
388
419
newBuf [ j ++ ] = dbcsCode >> 8 ; // high byte
389
420
newBuf [ j ++ ] = dbcsCode & 0xFF ; // low byte
390
421
}
391
- else {
422
+ else if ( dbcsCode < 0x1000000 ) {
392
423
newBuf [ j ++ ] = dbcsCode >> 16 ;
393
424
newBuf [ j ++ ] = ( dbcsCode >> 8 ) & 0xFF ;
394
425
newBuf [ j ++ ] = dbcsCode & 0xFF ;
426
+ } else {
427
+ newBuf [ j ++ ] = dbcsCode >>> 24 ;
428
+ newBuf [ j ++ ] = ( dbcsCode >>> 16 ) & 0xFF ;
429
+ newBuf [ j ++ ] = ( dbcsCode >>> 8 ) & 0xFF ;
430
+ newBuf [ j ++ ] = dbcsCode & 0xFF ;
395
431
}
396
432
}
397
433
0 commit comments