1
1
var es = require ( 'event-stream' ) ;
2
- var _ = require ( 'lodash' ) ;
3
2
var Reservoir = require ( 'reservoir' ) ;
3
+ var _ = require ( 'lodash' ) ;
4
4
5
5
// var debug = require('debug')('mongodb-schema:stream');
6
6
@@ -13,14 +13,14 @@ var Reservoir = require('reservoir');
13
13
*/
14
14
var extractStringValueFromBSON = function ( value ) {
15
15
if ( value && value . _bsontype ) {
16
- if ( _ . includes ( [ 'Decimal128' , 'Long' ] , value . _bsontype ) ) {
16
+ if ( [ 'Decimal128' , 'Long' ] . includes ( value . _bsontype ) ) {
17
17
return value . toString ( ) ;
18
18
}
19
- if ( _ . includes ( [ 'Double' , 'Int32' ] , value . _bsontype ) ) {
19
+ if ( [ 'Double' , 'Int32' ] . includes ( value . _bsontype ) ) {
20
20
return String ( value . value ) ;
21
21
}
22
22
}
23
- if ( _ . isString ( value ) ) {
23
+ if ( typeof value === 'string' ) {
24
24
return value ;
25
25
}
26
26
return String ( value ) ;
@@ -68,7 +68,7 @@ var finalizeSchema = function(schema, parent, tag) {
68
68
finalizeSchema ( schema . fields , schema , 'fields' ) ;
69
69
}
70
70
if ( tag === 'fields' ) {
71
- _ . each ( schema , function ( field ) {
71
+ Object . values ( schema ) . forEach ( ( field ) => {
72
72
// create `Undefined` pseudo-type
73
73
var missing = parent . count - field . count ;
74
74
if ( missing > 0 ) {
@@ -79,25 +79,27 @@ var finalizeSchema = function(schema, parent, tag) {
79
79
count : missing
80
80
} ;
81
81
}
82
- field . total_count = _ . sum ( field . types , 'count' ) ;
82
+ field . total_count = Object . values ( field . types )
83
+ . map ( v => v . count )
84
+ . reduce ( ( p , c ) => p + c , 0 ) ;
83
85
84
86
// recursively finalize types
85
87
finalizeSchema ( field . types , field , 'types' ) ;
86
- field . type = _ . pluck ( field . types , ' name' ) ;
88
+ field . type = field . types . map ( v => v . name ) ;
87
89
if ( field . type . length === 1 ) {
88
90
field . type = field . type [ 0 ] ;
89
91
}
90
92
// a field has duplicates when any of its types have duplicates
91
- field . has_duplicates = _ . any ( field . types , ' has_duplicates' ) ;
93
+ field . has_duplicates = ! ! field . types . find ( v => v . has_duplicates ) ;
92
94
// compute probability
93
95
field . probability = field . count / parent . count ;
94
96
} ) ;
95
97
// turn object into array
96
- parent . fields = _ . values ( parent . fields ) . sort ( fieldComparator ) ;
98
+ parent . fields = Object . values ( parent . fields ) . sort ( fieldComparator ) ;
97
99
}
98
100
if ( tag === 'types' ) {
99
- _ . each ( schema , function ( type ) {
100
- type . total_count = _ . sum ( type . lengths ) ;
101
+ Object . values ( schema ) . forEach ( type => {
102
+ type . total_count = ( type . lengths || [ ] ) . reduce ( ( p , c ) => p + c || 0 , 0 ) ;
101
103
// debug('recursively calling schema.fields');
102
104
finalizeSchema ( type . fields , type , 'fields' ) ;
103
105
// debug('recursively calling schema.types');
@@ -110,7 +112,7 @@ var finalizeSchema = function(schema, parent, tag) {
110
112
type . unique = type . count === 0 ? 0 : 1 ;
111
113
type . has_duplicates = type . count > 1 ;
112
114
} else if ( type . values ) {
113
- type . unique = _ . uniq ( type . values , false , extractStringValueFromBSON ) . length ;
115
+ type . unique = new Set ( type . values . map ( extractStringValueFromBSON ) ) . size ;
114
116
type . has_duplicates = type . unique !== type . values . length ;
115
117
}
116
118
// compute `average_length` for array types
@@ -119,7 +121,7 @@ var finalizeSchema = function(schema, parent, tag) {
119
121
}
120
122
// recursively finalize fields and types
121
123
} ) ;
122
- parent . types = _ . sortByOrder ( _ . values ( parent . types ) , ' probability' , 'desc' ) ;
124
+ parent . types = Object . values ( parent . types ) . sort ( ( a , b ) => b . probability - a . probability ) ;
123
125
}
124
126
return schema ;
125
127
} ;
@@ -146,31 +148,25 @@ module.exports = function parse(options) {
146
148
/* eslint no-sync: 0 */
147
149
148
150
// set default options
149
- options = _ . defaults ( { } , options , {
150
- semanticTypes : false ,
151
- storeValues : true
152
- } ) ;
151
+ options = { semanticTypes : false , storeValues : true , ...options } ;
153
152
154
153
var semanticTypes = require ( './semantic-types' ) ;
155
154
156
- if ( _ . isObject ( options . semanticTypes ) ) {
155
+ if ( typeof options . semanticTypes === 'object' ) {
157
156
// enable existing types that evaluate to true
158
- var enabledTypes = _ ( options . semanticTypes )
159
- . pick ( function ( val ) {
160
- return _ . isBoolean ( val ) && val ;
161
- } )
162
- . keys ( )
163
- . map ( function ( val ) {
164
- return val . toLowerCase ( ) ;
165
- } )
166
- . value ( ) ;
167
- semanticTypes = _ . pick ( semanticTypes , function ( val , key ) {
168
- return _ . includes ( enabledTypes , key . toLowerCase ( ) ) ;
169
- } ) ;
170
- // merge with custom types that are functions
171
- semanticTypes = _ . assign ( semanticTypes ,
172
- _ . pick ( options . semanticTypes , _ . isFunction )
173
- ) ;
157
+ var enabledTypes = Object . entries ( options . semanticTypes )
158
+ . filter ( ( [ , v ] ) => typeof v === 'boolean' && v )
159
+ . map ( ( [ k ] ) => k . toLowerCase ( ) ) ;
160
+
161
+ semanticTypes = { ...
162
+ Object . entries ( semanticTypes )
163
+ . filter ( ( [ k ] ) => enabledTypes . includes ( k . toLowerCase ( ) ) )
164
+ . reduce ( ( p , [ k , v ] ) => ( { ...p , [ k ] : v } ) , { } ) ,
165
+ } ;
166
+
167
+ Object . entries ( options . semanticTypes )
168
+ . filter ( ( [ , v ] ) => typeof v === 'function' )
169
+ . forEach ( ( [ k , v ] ) => { semanticTypes [ k ] = v ; } ) ;
174
170
}
175
171
176
172
var rootSchema = {
@@ -205,9 +201,13 @@ module.exports = function parse(options) {
205
201
206
202
var getSemanticType = function ( value , path ) {
207
203
// pass value to semantic type detectors, return first match or undefined
208
- return _ . findKey ( semanticTypes , function ( fn ) {
209
- return fn ( value , path ) ;
210
- } ) ;
204
+
205
+ const returnValue = Object . entries ( semanticTypes )
206
+ . filter ( ( [ , v ] ) => {
207
+ return v ( value , path ) ;
208
+ } )
209
+ . map ( ( [ k ] ) => k ) [ 0 ] ;
210
+ return returnValue ;
211
211
} ;
212
212
213
213
/**
@@ -236,13 +236,13 @@ module.exports = function parse(options) {
236
236
* @param {Object } schema the updated schema object
237
237
*/
238
238
239
+
239
240
var addToType = function ( path , value , schema ) {
240
241
var bsonType = getBSONType ( value ) ;
241
242
// if semantic type detection is enabled, the type is the semantic type
242
243
// or the original bson type if no semantic type was detected. If disabled,
243
244
// it is always the bson type.
244
- var typeName = ( options . semanticTypes ) ?
245
- getSemanticType ( value , path ) || bsonType : bsonType ;
245
+ var typeName = ( options . semanticTypes ) ? getSemanticType ( value , path ) || bsonType : bsonType ;
246
246
var type = schema [ typeName ] = _ . get ( schema , typeName , {
247
247
name : typeName ,
248
248
bsonType : bsonType ,
@@ -252,24 +252,22 @@ module.exports = function parse(options) {
252
252
type . count ++ ;
253
253
// recurse into arrays by calling `addToType` for each element
254
254
if ( typeName === 'Array' ) {
255
- type . types = _ . get ( type , ' types' , { } ) ;
256
- type . lengths = _ . get ( type , ' lengths' , [ ] ) ;
255
+ type . types = type . types || { } ;
256
+ type . lengths = type . lengths || [ ] ;
257
257
type . lengths . push ( value . length ) ;
258
- _ . each ( value , function ( v ) {
259
- addToType ( path , v , type . types ) ;
260
- } ) ;
258
+ value . forEach ( v => addToType ( path , v , type . types ) ) ;
261
259
262
260
// recurse into nested documents by calling `addToField` for all sub-fields
263
261
} else if ( typeName === 'Document' ) {
264
262
type . fields = _ . get ( type , 'fields' , { } ) ;
265
- _ . forOwn ( value , function ( v , k ) {
266
- addToField ( path + '.' + k , v , type . fields ) ;
267
- } ) ;
263
+ Object . entries ( value ) . forEach ( ( [ k , v ] ) => addToField ( path + '.' + k , v , type . fields ) ) ;
268
264
269
265
// if the `storeValues` option is enabled, store some example values
270
266
} else if ( options . storeValues ) {
271
- type . values = _ . get ( type , 'values' , bsonType === 'String' ?
272
- new Reservoir ( 100 ) : new Reservoir ( 10000 ) ) ;
267
+ var defaultValue = bsonType === 'String' ?
268
+ new Reservoir ( 100 ) : new Reservoir ( 10000 ) ;
269
+ type . values = type . values || defaultValue ;
270
+
273
271
addToValue ( type , value ) ;
274
272
}
275
273
} ;
@@ -284,8 +282,9 @@ module.exports = function parse(options) {
284
282
addToField = function ( path , value , schema ) {
285
283
var defaults = { } ;
286
284
285
+ var pathSplitOnDot = path . split ( '.' ) ;
287
286
defaults [ path ] = {
288
- name : _ . last ( path . split ( '.' ) ) ,
287
+ name : pathSplitOnDot [ pathSplitOnDot . length - 1 ] ,
289
288
path : path ,
290
289
count : 0 ,
291
290
types : { }
@@ -306,9 +305,7 @@ module.exports = function parse(options) {
306
305
}
307
306
308
307
var parser = es . through ( function write ( obj ) {
309
- _ . each ( _ . keys ( obj ) , function ( key ) {
310
- addToField ( key , obj [ key ] , rootSchema . fields ) ;
311
- } ) ;
308
+ Object . keys ( obj ) . forEach ( key => addToField ( key , obj [ key ] , rootSchema . fields ) ) ;
312
309
rootSchema . count += 1 ;
313
310
this . emit ( 'progress' , obj ) ;
314
311
} , function end ( ) {
0 commit comments