Benchmark: mat4 array caching

Script Preparation code:

function multiply(out, a, b) {
		let b0 = b[0]
		let b1 = b[1]
		let b2 = b[2]
		let b3 = b[3]
		
		out[0] = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]
		out[1] = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]
		out[2] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]
		out[3] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]

		b0 = b[4]
		b1 = b[5]
		b2 = b[6]
		b3 = b[7]
		
		out[4] = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]
		out[5] = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]
		out[6] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]
		out[7] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]

		b0 = b[8]
		b1 = b[9]
		b2 = b[10]
		b3 = b[11]

		out[8]  = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]
		out[9]  = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]
		out[10] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]
		out[11] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]

		b0 = b[12]
		b1 = b[13]
		b2 = b[14]
		b3 = b[15]

		out[12] = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]
		out[13] = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]
		out[14] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]
		out[15] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]
		
		return out
	}

function multiplyCached(out, a, b) {
  let a00 = a[0],
    a01 = a[1],
    a02 = a[2],
    a03 = a[3];
  let a10 = a[4],
    a11 = a[5],
    a12 = a[6],
    a13 = a[7];
  let a20 = a[8],
    a21 = a[9],
    a22 = a[10],
    a23 = a[11];
  let a30 = a[12],
    a31 = a[13],
    a32 = a[14],
    a33 = a[15];

  // Cache only the current line of the second matrix
  let b0 = b[0],
    b1 = b[1],
    b2 = b[2],
    b3 = b[3];
  out[0] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
  out[1] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
  out[2] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
  out[3] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;

  b0 = b[4];
  b1 = b[5];
  b2 = b[6];
  b3 = b[7];
  out[4] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
  out[5] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
  out[6] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
  out[7] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;

  b0 = b[8];
  b1 = b[9];
  b2 = b[10];
  b3 = b[11];
  out[8] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
  out[9] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
  out[10] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
  out[11] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;

  b0 = b[12];
  b1 = b[13];
  b2 = b[14];
  b3 = b[15];
  out[12] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;
  out[13] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;
  out[14] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;
  out[15] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;
  return out;
}

​x
 
function multiply(out, a, b) {        let b0 = b[0]        let b1 = b[1]        let b2 = b[2]        let b3 = b[3]                out[0] = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]        out[1] = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]        out[2] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]        out[3] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]​        b0 = b[4]        b1 = b[5]        b2 = b[6]        b3 = b[7]                out[4] = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]        out[5] = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]        out[6] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]        out[7] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]​        b0 = b[8]        b1 = b[9]        b2 = b[10]        b3 = b[11]​        out[8]  = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]        out[9]  = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]        out[10] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]        out[11] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]​        b0 = b[12]        b1 = b[13]        b2 = b[14]        b3 = b[15]​        out[12] = b0 * a[0] + b1 * a[4] + b2 * a[8]  + b3 * a[12]        out[13] = b0 * a[1] + b1 * a[5] + b2 * a[9]  + b3 * a[13]        out[14] = b0 * a[2] + b1 * a[6] + b2 * a[10] + b3 * a[14]        out[15] = b0 * a[3] + b1 * a[7] + b2 * a[11] + b3 * a[15]                return out    }​function multiplyCached(out, a, b) {  let a00 = a[0],    a01 = a[1],    a02 = a[2],    a03 = a[3];  let a10 = a[4],    a11 = a[5],    a12 = a[6],    a13 = a[7];  let a20 = a[8],    a21 = a[9],    a22 = a[10],    a23 = a[11];  let a30 = a[12],    a31 = a[13],    a32 = a[14],    a33 = a[15];​  // Cache only the current line of the second matrix  let b0 = b[0],    b1 = b[1],    b2 = b[2],    b3 = b[3];  out[0] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;  out[1] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;  out[2] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;  out[3] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;​  b0 = b[4];  b1 = b[5];  b2 = b[6];  b3 = b[7];  out[4] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;  out[5] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;  out[6] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;  out[7] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;​  b0 = b[8];  b1 = b[9];  b2 = b[10];  b3 = b[11];  out[8] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;  out[9] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;  out[10] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;  out[11] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;​  b0 = b[12];  b1 = b[13];  b2 = b[14];  b3 = b[15];  out[12] = b0 * a00 + b1 * a10 + b2 * a20 + b3 * a30;  out[13] = b0 * a01 + b1 * a11 + b2 * a21 + b3 * a31;  out[14] = b0 * a02 + b1 * a12 + b2 * a22 + b3 * a32;  out[15] = b0 * a03 + b1 * a13 + b2 * a23 + b3 * a33;  return out;}

Tests:

With cache

let result = [0, 0, 0, 0]
multiplyCached(result, [123.12, 131.31, 143.12, 0.123], [143.12, 0.123, 123.12, 131.31])

 
let result = [0, 0, 0, 0]multiplyCached(result, [123.12, 131.31, 143.12, 0.123], [143.12, 0.123, 123.12, 131.31])

Without cache
let result = [0, 0, 0, 0] multiply(result, [123.12, 131.31, 143.12, 0.123], [143.12, 0.123, 123.12, 131.31])
let result = [0, 0, 0, 0]
multiply(result, [123.12, 131.31, 143.12, 0.123], [143.12, 0.123, 123.12, 131.31])

Rendered benchmark preparation results:

Suite status: <idle, ready to run>

Previous results

Experimental features:

Memory measurements supported only in Chrome.
For precise memory measurements Chrome must be launched with --enable-precise-memory-info flag.
More information: Monitoring JavaScript Memory

Test case name	Result
With cache
Without cache

Fastest: N/A

Slowest: N/A

Latest run results:

Run details: (Test run date: 3 years ago)

User agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.62

Browser/OS: Chrome 92 on Windows

View result in a separate tab

Test name	Executions per second
With cache	6051233.5 Ops/sec
Without cache	4273649.5 Ops/sec

Autogenerated LLM Summary (model llama3.2:3b, generated 6 months ago):

A challenging benchmarking problem!

After analyzing the provided code, I'll attempt to break down and explain it.

Code Analysis

The code appears to be a JavaScript function multiplyCached (or multiply) that multiplies two arrays: [a00, ..., a33] and [b0, ..., b3]. The multiplication is done using the standard matrix multiplication algorithm. The difference between multiplyCached and multiply lies in the caching mechanism.

In multiplyCached, there are 16 iterations (from out[0] to out[15]) that perform the same calculation: multiplying each row of [a00, ..., a33] with each column of [b0, ..., b3]. These calculations seem to be stored in cache (b0 to b15) and reused across iterations.

In contrast, multiply does not use caching. Each iteration performs the full calculation from scratch.

Benchmarking

The benchmarking data suggests that multiplyCached outperforms multiply, with a significant difference in executions per second (6051233.5 vs 4273649.5).

This outcome is likely due to the caching mechanism in multiplyCached. By reusing previously computed values, it avoids redundant calculations and reduces the computational overhead.

Conclusion

The provided code demonstrates an optimization technique using caching to improve the performance of matrix multiplication. The benchmarking data supports this claim, showing a substantial speedup for the cached version (multiplyCached) compared to the non-cached version (multiply).

LLMs can make mistakes. Check important info.

A challenging benchmarking problem!

After analyzing the provided code, I'll attempt to break down and explain it.

**Code Analysis**

The code appears to be a JavaScript function `multiplyCached` (or `multiply`) that multiplies two arrays: `[a00, ..., a33]` and `[b0, ..., b3]`. The multiplication is done using the standard matrix multiplication algorithm. The difference between `multiplyCached` and `multiply` lies in the caching mechanism.

In `multiplyCached`, there are 16 iterations (from `out[0]` to `out[15]`) that perform the same calculation: multiplying each row of `[a00, ..., a33]` with each column of `[b0, ..., b3]`. These calculations seem to be stored in cache (`b0` to `b15`) and reused across iterations.

In contrast, `multiply` does not use caching. Each iteration performs the full calculation from scratch.

**Benchmarking**

The benchmarking data suggests that `multiplyCached` outperforms `multiply`, with a significant difference in executions per second (6051233.5 vs 4273649.5).

This outcome is likely due to the caching mechanism in `multiplyCached`. By reusing previously computed values, it avoids redundant calculations and reduces the computational overhead.

**Conclusion**

The provided code demonstrates an optimization technique using caching to improve the performance of matrix multiplication. The benchmarking data supports this claim, showing a substantial speedup for the cached version (`multiplyCached`) compared to the non-cached version (`multiply`).

Related benchmarks:

mat4 array caching (version: 0)

Comparing performance of: With cache vs Without cache

Created: 3 years ago by: Guest

Jump to the latest result