<script src="https://cdn.jsdelivr.net/npm/object-hash@2.0.3/dist/object_hash.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/lodash@4.17.4/lodash.min.js"></script>
window.NUM_ITEMS = 1000;
window.NUM_TAKE = 20;
window.input = [];
window.data = [];
window.check = [];
window.indices = [Array(window.NUM_ITEMS).keys()];
// Populate data: entries with a nested array of 50 random entries
for (const i of window.indices) {
window.data[i] = {
ids: window.indices.slice(0, 50).map((x) => Math.floor(Math.random() * window.NUM_ITEMS * 10)),
counter: i,
counter2: Math.floor(Math.random() * i),
};
window.check[i] = false;
}
// calculate hashes for each data object
window.hashes = new Set(window.data.map((obj) => objectHash(obj)));
window.strings = new Set(window.data.map(JSON.stringify));
window.TAKE_IDX = window.indices.reverse().slice(0, window.NUM_TAKE);
// Uncomment to choose random indices
//window.TAKE_IDX = window.TAKE_IDX.map((x) => Math.floor(Math.random() * window.NUM_ITEMS));
// Populate input: copies of the data
for (const i of window.TAKE_IDX) {
window.input[i] = { ids: [window.data[i].ids], counter: window.data[i].counter, counter2: window.data[i].counter2 };
}
for (const i of window.TAKE_IDX) {
window.check[i] = window.data.some((item) => _.isEqual(item, window.input[i]));
}
for (const i of window.TAKE_IDX) {
window.check[i] = window.strings.has(JSON.stringify(window.input[i]));
}
for (const i of window.TAKE_IDX) {
window.check[i] = window.hashes.has(objectHash(window.input[i]));
}
--enable-precise-memory-info
flag.
Test case name | Result |
---|---|
_.isEqual | |
JSON.stringify | |
objectHash |
Test name | Executions per second |
---|---|
_.isEqual | 47.6 Ops/sec |
JSON.stringify | 24354.5 Ops/sec |
objectHash | 257.5 Ops/sec |
I'll break down the provided benchmark definition and explain what's being tested, compared, and their pros/cons.
Benchmark Overview
The benchmark compares three approaches to detect duplicate data in an array of objects with nested properties:
_.isEqual
(Lodash)JSON.stringify
objectHash
The test case uses a large dataset (window.NUM_ITEMS = 1000
) and generates random data with nested arrays.
Approaches Comparison
Here's a brief overview of each approach:
Other Considerations
Alternatives
If you're interested in exploring alternative approaches, here are a few options:
crypto
module in Node.js or hash-object
library.Keep in mind that the best approach depends on your specific use case and requirements.