I’ve been investigating the idea of indexing arrays with opaque id types rather than generic integers but I’ve been running into performance issues doing so. Here is a self-contained, minimal working example illustrating one particular issue I am facing.
In the following example, all I am doing is define an unsafe_set
function that aliases Array.unsafe_set
. The version of the code using the alias is about 3x slower (compiled with OCaml 4.14 and flambda, using option -O3).
let unsafe_set arr idx v = Array.unsafe_set arr idx v [@@inline]
let normal =
let arr = Array.make 1 0 in
fun () -> Array.unsafe_set arr 0 1
let variant =
let arr = Array.make 1 0 in
fun () -> unsafe_set arr 0 1
let () =
let open Core_bench in
let benchs = [
Bench.Test.create ~name:"Normal" normal;
Bench.Test.create ~name:"Variant" variant] in
benchs |> Bench.make_command |> Command_unix.run
Results:
┌─────────┬──────────┬────────────┐
│ Name │ Time/Run │ Percentage │
├─────────┼──────────┼────────────┤
│ Normal │ 0.89ns │ 29.25% │
│ Variant │ 3.05ns │ 100.00% │
└─────────┴──────────┴────────────┘
It is unclear to me why such a difference is observed since the two snippets above should intuitively produce the same code. Looking at Flambda’s inlining report, unsafe_set
is getting inlined. However, the assembly code produced for the version with aliasing does seem to include an extra bound-check. This check is unexpected since I am using Array.unsafe_set
. In any case, replacing Array.unsafe_set
by Array.set
everywhere makes no difference (same runtimes, 3x gap).
See Flamba's report and the generated assembly code
Flambda inlining report:
* Application of unsafe_set/2{asm_alias.ml:5,2-20}
This function was not specialised because it is not recursive.
This function was inlined because of an annotation.
Normal version assembly:
.L101:
movq 8(%r14), %r15
movq camlDune__exe__Asm_normal__Pccall_36@GOTPCREL(%rip), %rbx
movq %rax, (%rbx)
movq camlDune__exe__Asm_normal__Pccall_36@GOTPCREL(%rip), %rax
movq (%rax), %rax
movq $3, (%rax)
movq camlDune__exe__Asm_normal__Parrayrefs_34@GOTPCREL(%rip), %rax
movq camlDune__exe__Asm_normal__Pccall_36@GOTPCREL(%rip), %rbx
movq (%rbx), %rbx
movq -8(%rbx), %rdi
cmpq $1023, %rdi
Aliased version assembly:
.L103:
movq 8(%r14), %r15
movq camlDune__exe__Asm_alias__Pccall_46@GOTPCREL(%rip), %rbx
movq %rax, (%rbx)
movq camlDune__exe__Asm_alias__Pccall_46@GOTPCREL(%rip), %rax
movq (%rax), %rdi
movzbq -8(%rdi), %rax
cmpq $254, %rax
je .L101
movl $3, %esi
call caml_modify@PLT
jmp .L100
.L101:
movl $3, %eax
movsd (%rax), %xmm0
movsd %xmm0, (%rdi)
.L100:
movq camlDune__exe__Asm_alias__Parrayrefs_44@GOTPCREL(%rip), %rax
movq camlDune__exe__Asm_alias__Pccall_46@GOTPCREL(%rip), %rbx
movq (%rbx), %rbx
movq -8(%rbx), %rdi
cmpq $1023, %rdi
...
.L104:
call caml_ml_array_bound_error@PLT
Does anyone have an insight on what may be happening here?
You can download the full code to replicate here.