Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the usage of lodash (round 2) #145

Merged
merged 19 commits into from
Mar 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .eslintrc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"parserOptions": {
"ecmaVersion": 2017
"ecmaVersion": 2019
},
"env": {
"mocha": true,
Expand All @@ -18,6 +18,6 @@
},
"extends": [
"mongodb-js/node",
"mongodb-js/browser",
"mongodb-js/browser"
]
}
3 changes: 2 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
uses: actions/setup-node@v2
with:
node-version: ${{ matrix.node-version }}
- uses: bahmutov/npm-install@v1
- run: npm ci
- run: npm run check
- run: npm test
- run: npm run coverage
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ dist/
# Test output
.nyc_output
# Mac OS files on folder create/delete
.DS_Store
.DS_Store
12 changes: 0 additions & 12 deletions .zuul.yml

This file was deleted.

2 changes: 1 addition & 1 deletion bin/mongodb-schema
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ var bar = new ProgressBar('analyzing [:bar] :percent :etas ', {
clear: true
});

mongodb.connect(uri, function(err, conn) {
mongodb.connect(uri, {useUnifiedTopology: true}, function(err, conn) {
if (err) {
console.error('Failed to connect to MongoDB: ', err);
process.exit(1);
Expand Down
5 changes: 2 additions & 3 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
var stream = require('./stream');
var es = require('event-stream');
var _ = require('lodash');

// var debug = require('debug')('mongodb-schema:wrapper');

Expand All @@ -20,7 +19,7 @@ var _ = require('lodash');
module.exports = function(docs, options, callback) {
const promise = new Promise((resolve, reject) => {
// shift parameters if no options are specified
if (_.isUndefined(options) || (_.isFunction(options) && _.isUndefined(callback))) {
if (typeof options === 'undefined' || (typeof options === 'function' && typeof callback === 'undefined')) {
callback = options;
options = {};
}
Expand All @@ -33,7 +32,7 @@ module.exports = function(docs, options, callback) {
} else if (docs.pipe && typeof docs.pipe === 'function') {
src = docs;
// Arrays
} else if (_.isArray(docs)) {
} else if (Array.isArray(docs)) {
src = es.readArray(docs);
} else {
reject(new Error(
Expand Down
44 changes: 24 additions & 20 deletions lib/stats.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
var _ = require('lodash');
// var debug = require('debug')('mongodb-schema:stats');

var widthRecursive = function(schema) {
Expand All @@ -8,17 +7,21 @@ var widthRecursive = function(schema) {
}
if (schema.fields !== undefined) {
width += schema.fields.length;
width += _.sum(schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');

width += schema.fields.map(field => {
var doc = field.types.find(v => v.name === 'Document');
return widthRecursive(doc);
}));
width += _.sum(schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
}).reduce((p, c) => p + c || 0, 0);


width += schema.fields.map(field => {
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return widthRecursive(doc);
}
}));
})
.reduce((p, c) => p + c || 0, 0);
}
return width;
};
Expand All @@ -30,18 +33,19 @@ var depthRecursive = function(schema) {
var maxChildDepth = 0;
if (schema.fields !== undefined && schema.fields.length > 0) {
maxChildDepth = 1 + Math.max(
_.max(schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
Math.max(...schema.fields.map(field => {
var doc = field.types.find(v => v.name === 'Document');
return depthRecursive(doc);
})),
_.max(schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
Math.max(...schema.fields.map(field => {
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return depthRecursive(doc);
}
return 0;
})));
}))
);
}
return maxChildDepth;
};
Expand All @@ -55,21 +59,21 @@ var branchingFactors = function(schema) {
if (schema.fields !== undefined && schema.fields.length > 0) {
branchArray.push(schema.fields.length);
res = schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
var doc = field.types.find(v => v.name === 'Document');
return branchingFactors(doc);
});
branchArray.push.apply(branchArray, _.flatten(res, true));
branchArray.push(...res.flat(Infinity));
res = schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return branchingFactors(doc);
}
return [];
});
branchArray.push.apply(branchArray, _.flatten(res, true));
branchArray.push(...res.flat(Infinity));
}
return _.sortBy(branchArray).reverse();
return branchArray.sort().reverse();
};

module.exports = {
Expand Down
103 changes: 50 additions & 53 deletions lib/stream.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
var es = require('event-stream');
var _ = require('lodash');
var Reservoir = require('reservoir');
var _ = require('lodash');

// var debug = require('debug')('mongodb-schema:stream');

Expand All @@ -13,14 +13,14 @@ var Reservoir = require('reservoir');
*/
var extractStringValueFromBSON = function(value) {
if (value && value._bsontype) {
if (_.includes([ 'Decimal128', 'Long' ], value._bsontype)) {
if (['Decimal128', 'Long'].includes(value._bsontype)) {
return value.toString();
}
if (_.includes([ 'Double', 'Int32' ], value._bsontype)) {
if ([ 'Double', 'Int32' ].includes(value._bsontype)) {
return String(value.value);
}
}
if (_.isString(value)) {
if (typeof value === 'string') {
return value;
}
return String(value);
Expand Down Expand Up @@ -68,7 +68,7 @@ var finalizeSchema = function(schema, parent, tag) {
finalizeSchema(schema.fields, schema, 'fields');
}
if (tag === 'fields') {
_.each(schema, function(field) {
Object.values(schema).forEach((field) => {
// create `Undefined` pseudo-type
var missing = parent.count - field.count;
if (missing > 0) {
Expand All @@ -79,25 +79,27 @@ var finalizeSchema = function(schema, parent, tag) {
count: missing
};
}
field.total_count = _.sum(field.types, 'count');
field.total_count = Object.values(field.types)
.map(v => v.count)
.reduce((p, c) => p + c, 0);

// recursively finalize types
finalizeSchema(field.types, field, 'types');
field.type = _.pluck(field.types, 'name');
field.type = field.types.map(v => v.name);
if (field.type.length === 1) {
field.type = field.type[0];
}
// a field has duplicates when any of its types have duplicates
field.has_duplicates = _.any(field.types, 'has_duplicates');
field.has_duplicates = !!field.types.find(v => v.has_duplicates);
// compute probability
field.probability = field.count / parent.count;
});
// turn object into array
parent.fields = _.values(parent.fields).sort(fieldComparator);
parent.fields = Object.values(parent.fields).sort(fieldComparator);
}
if (tag === 'types') {
_.each(schema, function(type) {
type.total_count = _.sum(type.lengths);
Object.values(schema).forEach(type => {
type.total_count = (type.lengths || []).reduce((p, c) => p + c || 0, 0);
// debug('recursively calling schema.fields');
finalizeSchema(type.fields, type, 'fields');
// debug('recursively calling schema.types');
Expand All @@ -110,7 +112,7 @@ var finalizeSchema = function(schema, parent, tag) {
type.unique = type.count === 0 ? 0 : 1;
type.has_duplicates = type.count > 1;
} else if (type.values) {
type.unique = _.uniq(type.values, false, extractStringValueFromBSON).length;
type.unique = new Set(type.values.map(extractStringValueFromBSON)).size;
type.has_duplicates = type.unique !== type.values.length;
}
// compute `average_length` for array types
Expand All @@ -119,7 +121,7 @@ var finalizeSchema = function(schema, parent, tag) {
}
// recursively finalize fields and types
});
parent.types = _.sortByOrder(_.values(parent.types), 'probability', 'desc');
parent.types = Object.values(parent.types).sort((a, b) => b.probability - a.probability);
}
return schema;
};
Expand All @@ -146,31 +148,25 @@ module.exports = function parse(options) {
/* eslint no-sync: 0 */

// set default options
options = _.defaults({}, options, {
semanticTypes: false,
storeValues: true
});
options = { semanticTypes: false, storeValues: true, ...options};

var semanticTypes = require('./semantic-types');

if (_.isObject(options.semanticTypes)) {
if (typeof options.semanticTypes === 'object') {
// enable existing types that evaluate to true
var enabledTypes = _(options.semanticTypes)
.pick(function(val) {
return _.isBoolean(val) && val;
})
.keys()
.map(function(val) {
return val.toLowerCase();
})
.value();
semanticTypes = _.pick(semanticTypes, function(val, key) {
return _.includes(enabledTypes, key.toLowerCase());
});
// merge with custom types that are functions
semanticTypes = _.assign(semanticTypes,
_.pick(options.semanticTypes, _.isFunction)
);
var enabledTypes = Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'boolean' && v)
.map(([k]) => k.toLowerCase());

semanticTypes = {...
Object.entries(semanticTypes)
.filter(([k]) => enabledTypes.includes(k.toLowerCase()))
.reduce((p, [k, v]) => ({...p, [k]: v}), {}),
};

Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'function')
.forEach(([k, v]) => {semanticTypes[k] = v;});
}

var rootSchema = {
Expand Down Expand Up @@ -205,9 +201,13 @@ module.exports = function parse(options) {

var getSemanticType = function(value, path) {
// pass value to semantic type detectors, return first match or undefined
return _.findKey(semanticTypes, function(fn) {
return fn(value, path);
});

const returnValue = Object.entries(semanticTypes)
.filter(([, v]) => {
return v(value, path);
})
.map(([k]) => k)[0];
return returnValue;
};

/**
Expand Down Expand Up @@ -236,13 +236,13 @@ module.exports = function parse(options) {
* @param {Object} schema the updated schema object
*/


var addToType = function(path, value, schema) {
var bsonType = getBSONType(value);
// if semantic type detection is enabled, the type is the semantic type
// or the original bson type if no semantic type was detected. If disabled,
// it is always the bson type.
var typeName = (options.semanticTypes) ?
getSemanticType(value, path) || bsonType : bsonType;
var typeName = (options.semanticTypes) ? getSemanticType(value, path) || bsonType : bsonType;
var type = schema[typeName] = _.get(schema, typeName, {
name: typeName,
bsonType: bsonType,
Expand All @@ -252,24 +252,22 @@ module.exports = function parse(options) {
type.count++;
// recurse into arrays by calling `addToType` for each element
if (typeName === 'Array') {
type.types = _.get(type, 'types', {});
type.lengths = _.get(type, 'lengths', []);
type.types = type.types || {};
type.lengths = type.lengths || [];
type.lengths.push(value.length);
_.each(value, function(v) {
addToType(path, v, type.types);
});
value.forEach(v => addToType(path, v, type.types));

// recurse into nested documents by calling `addToField` for all sub-fields
} else if (typeName === 'Document') {
type.fields = _.get(type, 'fields', {});
_.forOwn(value, function(v, k) {
addToField(path + '.' + k, v, type.fields);
});
Object.entries(value).forEach(([k, v]) => addToField(path + '.' + k, v, type.fields));

// if the `storeValues` option is enabled, store some example values
} else if (options.storeValues) {
type.values = _.get(type, 'values', bsonType === 'String' ?
new Reservoir(100) : new Reservoir(10000));
var defaultValue = bsonType === 'String' ?
new Reservoir(100) : new Reservoir(10000);
type.values = type.values || defaultValue;

addToValue(type, value);
}
};
Expand All @@ -284,8 +282,9 @@ module.exports = function parse(options) {
addToField = function(path, value, schema) {
var defaults = {};

var pathSplitOnDot = path.split('.');
defaults[path] = {
name: _.last(path.split('.')),
name: pathSplitOnDot[pathSplitOnDot.length - 1],
path: path,
count: 0,
types: {}
Expand All @@ -306,9 +305,7 @@ module.exports = function parse(options) {
}

var parser = es.through(function write(obj) {
_.each(_.keys(obj), function(key) {
addToField(key, obj[key], rootSchema.fields);
});
Object.keys(obj).forEach(key => addToField(key, obj[key], rootSchema.fields));
rootSchema.count += 1;
this.emit('progress', obj);
}, function end() {
Expand Down
Loading