I am trying to understand how to use the unboxing and local features from the Jane Street fork to get more performant code. This is from the link here: Jane Street OCaml extensions When I benchmark this, the first function is much faster than the second. I used the -dcmm flag to dump the result but the final code looks identical to me so I’m stumped as to why one is slower than the other. The only difference I can see is that the second function gets inlined into the main expression at the very end, and the first does not. I tried to use the [@inline never]
annotation to prevent the second function from being unlined, but from my inspection of the cmm output this had no affect, so I don’t know if this annotation is implemented in the new pass or there’s a bug.
@dkalinichenko do you have any thoughts?
module M = Stdlib_upstream_compatible.Float_u
let pi_approx : int -> float =
fun n ->
let rec pi_sum k (acc: float#) : float# =
if k > 0 then
pi_sum (k-1)
(M.add acc (let a = M.of_float (float_of_int k) in M.div #1. (M.mul a a)))
else acc
in
M.to_float (pi_sum n #0.)
type float_ref = { mutable field : float# };;
let pi_approx_while n =
let local_ acc : float_ref = { field = #0.0 } in
let local_ k = ref n in
while !k > 0 do
acc.field <-
(M.add acc.field @@
(let a = M.of_float (float_of_int !k) in M.div #1. (M.mul a a)));
k := local_ (!k - 1)
done;
M.to_float acc.field
;;
let () =
(let t0 = Unix.gettimeofday () in
let b = (pi_approx 10_000_000) in
let t1 = Unix.gettimeofday () in
Printf.printf "%f %f\n" (t1 -. t0) b);
flush stdout;
(let t0 = Unix.gettimeofday () in
let s = pi_approx_while 10_000_000 in
let t1 = Unix.gettimeofday () in
Printf.printf "%f %f\n" (t1 -. t0) s);
flush stdout;
;;
Output:
0.023381 1.644934
0.062211 1.644934