Benchmark: Lodash.isEqual vs object-hash vs JSON.stringify: Duplicate data detection for array of objects with nested properties and lots of records - Final

HTML Preparation code:

<script src="https://cdn.jsdelivr.net/npm/object-hash@2.0.3/dist/object_hash.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/lodash@4.17.4/lodash.min.js"></script>

AخA
 
<script src="https://cdn.jsdelivr.net/npm/object-hash@2.0.3/dist/object_hash.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/lodash@4.17.4/lodash.min.js"></script>

Script Preparation code:

async function H(m) {
  const msgUint8 = new TextEncoder().encode(m);
  const hashBuffer = await crypto.subtle.digest("SHA-256", msgUint8);
  const hashArray = Array.from(new Uint8Array(hashBuffer));
  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
}

function JSONStringifyOrdered(obj) {
  const allKeys = new Set();
  JSON.stringify(obj, (key, value) => (allKeys.add(key), value));
  return JSON.stringify(obj, Array.from(allKeys).sort());
}

async function prepare() {
  window.NUM_ITEMS = 1000;
  window.NUM_TAKE = 20;
  window.input = [];
  window.data = [];
  window.indices = [...Array(window.NUM_ITEMS).keys()];

  // Populate data: entries with a nested array of 50 random entries
  for (const i of window.indices) {
    window.data[i] = {
      ids: window.indices
        .slice(0, 1000)
        .map((x) => Math.floor(Math.random() * window.NUM_ITEMS * 10)),
      counter: i,
      counter2: Math.floor(Math.random() * i),
    };
  }

  window.check = window.data.map((obj) => false);
  window.stringified = window.data.map(JSONStringifyOrdered);
  window.strings = new Set(window.stringified);
  // calculate hashes for each data object
  window.hashes = new Set(window.data.map(objectHash));
  window.stringsHashes = new Set(window.stringified.map(objectHash));

  window.shas = new Set(await Promise.all(
    window.stringified.map(async (item) => await H(item))
  ));

  window.TAKE_IDX = window.indices.reverse().slice(0, window.NUM_TAKE);
  // Uncomment to choose random indices
  //window.TAKE_IDX = window.TAKE_IDX.map((x) => Math.floor(Math.random() * window.NUM_ITEMS));

  // Populate input: copies of the data
  for (const i of window.TAKE_IDX) {
    window.input[i] = {
      ids: [...window.data[i].ids],
      counter: window.data[i].counter,
      counter2: window.data[i].counter2,
    };
  }
  
  console.log(window.TAKE_IDX);
  console.log("END:prepare");
}

console.log("START:prepare");
prepare();

​x
 
async function H(m) {  const msgUint8 = new TextEncoder().encode(m);  const hashBuffer = await crypto.subtle.digest("SHA-256", msgUint8);  const hashArray = Array.from(new Uint8Array(hashBuffer));  return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");}​function JSONStringifyOrdered(obj) {  const allKeys = new Set();  JSON.stringify(obj, (key, value) => (allKeys.add(key), value));  return JSON.stringify(obj, Array.from(allKeys).sort());}​async function prepare() {  window.NUM_ITEMS = 1000;  window.NUM_TAKE = 20;  window.input = [];  window.data = [];  window.indices = [...Array(window.NUM_ITEMS).keys()];​  // Populate data: entries with a nested array of 50 random entries  for (const i of window.indices) {    window.data[i] = {      ids: window.indices        .slice(0, 1000)        .map((x) => Math.floor(Math.random() * window.NUM_ITEMS * 10)),      counter: i,      counter2: Math.floor(Math.random() * i),    };  }​  window.check = window.data.map((obj) => false);  window.stringified = window.data.map(JSONStringifyOrdered);  window.strings = new Set(window.stringified);  // calculate hashes for each data object  window.hashes = new Set(window.data.map(objectHash));  window.stringsHashes = new Set(window.stringified.map(objectHash));​  window.shas = new Set(await Promise.all(    window.stringified.map(async (item) => await H(item))  ));​  window.TAKE_IDX = window.indices.reverse().slice(0, window.NUM_TAKE);  // Uncomment to choose random indices  //window.TAKE_IDX = window.TAKE_IDX.map((x) => Math.floor(Math.random() * window.NUM_ITEMS));​  // Populate input: copies of the data  for (const i of window.TAKE_IDX) {    window.input[i] = {      ids: [...window.data[i].ids],      counter: window.data[i].counter,      counter2: window.data[i].counter2,    };  }    console.log(window.TAKE_IDX);  console.log("END:prepare");}​console.log("START:prepare");prepare();

Tests:

Iteration + _.isEqual
for (const i of window.TAKE_IDX) { window.check[i] = window.data.some((item) => _.isEqual(item, window.input[i])); }
for (const i of window.TAKE_IDX) {
window.check[i] = window.data.some((item) => _.isEqual(item, window.input[i]));
}
Iteration + JSON.stringify
for (const i of window.TAKE_IDX) { window.check[i] = window.stringified.some((item) => item === JSONStringifyOrdered(window.input[i])); }
for (const i of window.TAKE_IDX) {
window.check[i] = window.stringified.some((item) => item === JSONStringifyOrdered(window.input[i]));
}
Set + objectHash
for (const i of window.TAKE_IDX) { window.check[i] = window.hashes.has(objectHash(window.input[i])); }
for (const i of window.TAKE_IDX) {
window.check[i] = window.hashes.has(objectHash(window.input[i]));
}
Set + JSON.stringify
for (const i of window.TAKE_IDX) { window.check[i] = window.strings.has(JSONStringifyOrdered(window.input[i])); }
for (const i of window.TAKE_IDX) {
window.check[i] = window.strings.has(JSONStringifyOrdered(window.input[i]));
}
Set + JSON.stringify + native SHA
async function testNativeHash() { for (const i of window.TAKE_IDX) { window.check[i] = window.shas.has(await H(JSONStringifyOrdered(window.input[i]))); } } testNativeHash();
async function testNativeHash() {
for (const i of window.TAKE_IDX) {
window.check[i] = window.shas.has(await H(JSONStringifyOrdered(window.input[i])));
}
}

testNativeHash();

Rendered benchmark preparation results:

Suite status: <idle, ready to run>

Previous results

Experimental features:

Memory measurements supported only in Chrome.
For precise memory measurements Chrome must be launched with --enable-precise-memory-info flag.
More information: Monitoring JavaScript Memory

Test case name	Result
Iteration + _.isEqual
Iteration + JSON.stringify
Set + objectHash
Set + JSON.stringify
Set + JSON.stringify + native SHA

Fastest: N/A

Slowest: N/A

Latest run results:

Run details: (Test run date: one year ago)

User agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36

Browser/OS: Chrome 123 on Windows

View result in a separate tab

Test name	Executions per second
Iteration + _.isEqual	53.6 Ops/sec
Iteration + JSON.stringify	0.2 Ops/sec
Set + objectHash	39.7 Ops/sec
Set + JSON.stringify	144.0 Ops/sec
Set + JSON.stringify + native SHA	2751.0 Ops/sec

Autogenerated LLM Summary (model llama3.2:3b, generated 6 months ago):

Let's break down the provided benchmark and explain what is being tested, compared, and their pros and cons.

Benchmark Overview

The provided benchmark measures the performance of different approaches for detecting duplicate data in an array of objects with nested properties. The test consists of four individual test cases:

Iteration + _.isEqual: Iterates over the input data and checks if each object is equal to one of the objects in the original dataset using Lodash's isEqual function.
Iteration + JSON.stringify: Iterates over the input data and checks if each stringified object matches one of the strings from the original dataset, which was created by sorting the keys of the objects.
Set + objectHash: Uses an objectHash function to create a hash for each object in the dataset and then checks if this hash exists in a set of hashes created for the input data.
Set + JSON.stringify + native SHA: Similar to the previous test, but uses a custom implementation of SHA-256 (native hashing) to generate hashes for the stringified objects.

Options Compared

Here's a brief overview of each option:

_.isEqual (Lodash): Compares two values for equality. It's a simple and efficient way to check if two objects have the same properties with the same values.
JSON.stringify: Converts an object to a string by sorting its keys and appending them to the original object.
objectHash: A custom function that generates a unique hash for each object in the dataset. This is not shown in the provided code, but it's likely a hash function (e.g., SHA-256) that takes an object as input.

Pros and Cons

Here are some pros and cons of each option:

_.isEqual:
- Pros: Fast, simple, and efficient.
- Cons: May not work correctly for nested objects with different property names or values.
JSON.stringify:
- Pros: Easy to implement and can be faster than iterating over the dataset using _.isEqual.
- Cons: Sorting keys can lead to performance issues for large datasets, as it has a time complexity of O(n log n).
objectHash:
- Pros: Can be more efficient than sorting keys when dealing with large datasets. However, its implementation depends on the chosen hash function.
- Cons: Requires additional computation and memory overhead due to creating hashes for each object in the dataset.

Benchmark Results

The benchmark results show that:

Set + JSON.stringify + native SHA: This approach performs the best, likely due to the combination of efficient stringification using JSON.stringify and fast hashing using a custom implementation.
Set + objectHash: Comes in second, possibly because it's faster than sorting keys but still requires creating hashes for each object.
Iteration + _.isEqual and Iteration + JSON.stringify: Are slower due to the need to iterate over the dataset.

In summary, this benchmark compares different approaches for detecting duplicate data in a large dataset. The results highlight that using Set with custom hashing (e.g., SHA-256) can be more efficient than other methods, especially when dealing with large datasets.

LLMs can make mistakes. Check important info.

Let's break down the provided benchmark and explain what is being tested, compared, and their pros and cons.

**Benchmark Overview**

The provided benchmark measures the performance of different approaches for detecting duplicate data in an array of objects with nested properties. The test consists of four individual test cases:

1. **Iteration + _.isEqual**: Iterates over the input data and checks if each object is equal to one of the objects in the original dataset using Lodash's `isEqual` function.
2. **Iteration + JSON.stringify**: Iterates over the input data and checks if each stringified object matches one of the strings from the original dataset, which was created by sorting the keys of the objects.
3. **Set + objectHash**: Uses an `objectHash` function to create a hash for each object in the dataset and then checks if this hash exists in a set of hashes created for the input data.
4. **Set + JSON.stringify + native SHA**: Similar to the previous test, but uses a custom implementation of SHA-256 (native hashing) to generate hashes for the stringified objects.

**Options Compared**

Here's a brief overview of each option:

*   _.isEqual (Lodash): Compares two values for equality. It's a simple and efficient way to check if two objects have the same properties with the same values.
*   JSON.stringify: Converts an object to a string by sorting its keys and appending them to the original object.
*   objectHash: A custom function that generates a unique hash for each object in the dataset. This is not shown in the provided code, but it's likely a hash function (e.g., SHA-256) that takes an object as input.

**Pros and Cons**

Here are some pros and cons of each option:

*   _.isEqual:
    *   Pros: Fast, simple, and efficient.
    *   Cons: May not work correctly for nested objects with different property names or values.
*   JSON.stringify:
    *   Pros: Easy to implement and can be faster than iterating over the dataset using _.isEqual.
    *   Cons: Sorting keys can lead to performance issues for large datasets, as it has a time complexity of O(n log n).
*   objectHash:
    *   Pros: Can be more efficient than sorting keys when dealing with large datasets. However, its implementation depends on the chosen hash function.
    *   Cons: Requires additional computation and memory overhead due to creating hashes for each object in the dataset.

**Benchmark Results**

The benchmark results show that:

*   **Set + JSON.stringify + native SHA**: This approach performs the best, likely due to the combination of efficient stringification using `JSON.stringify` and fast hashing using a custom implementation.
*   **Set + objectHash**: Comes in second, possibly because it's faster than sorting keys but still requires creating hashes for each object.
*   **Iteration + _.isEqual** and **Iteration + JSON.stringify**: Are slower due to the need to iterate over the dataset.

In summary, this benchmark compares different approaches for detecting duplicate data in a large dataset. The results highlight that using `Set` with custom hashing (e.g., SHA-256) can be more efficient than other methods, especially when dealing with large datasets.

Related benchmarks:

Lodash.isEqual vs object-hash vs JSON.stringify: Duplicate data detection for array of objects with nested properties and lots of records - Final (version: 24)

Comparing performance of: Iteration + _.isEqual vs Iteration + JSON.stringify vs Set + objectHash vs Set + JSON.stringify vs Set + JSON.stringify + native SHA

Created: 2 years ago by: Registered User

Jump to the latest result

Iteration + _.isEqual

Iteration + JSON.stringify

Set + objectHash

Set + JSON.stringify

Set + JSON.stringify + native SHA

Suite status: <idle, ready to run>

Experimental features:

Fastest: N/A

Slowest: N/A

Autogenerated LLM Summary (model llama3.2:3b, generated 6 months ago):