I used the following diff based on your advice:
let extract =
+ let tmp : Mpz.m Mpz.tt = Mpz.init () in
let res : Mpz.m Mpz.tt = Mpz.init () in
fun (num,den,acc) nth ->
(* (nth * num + acc) / den |> to_int *)
- Mpz.mul_si res num nth;
- Mpz.add res res acc;
- Mpz.tdiv_q res res den;
+ Mpz.set tmp acc;
+ Mpz.addmul_ui tmp num nth;
+ Mpz.tdiv_q res tmp den;
Mpz.get_int res
[...]
let prod =
- let tmp : Mpz.m Mpz.tt = Mpz.init () in
fun (res_num, res_den, res_acc) (num,den,acc) d ->
(* (10 * num, den, 10 * (acc - den * of_int d)) *)
Mpz.mul_si res_num num 10;
- Mpz.set res_den den;
+ if res_den != den then Mpz.set res_den den;
- Mpz.mul_si tmp den d;
- Mpz.sub res_acc acc tmp;
+ if res_acc != acc then Mpz.set res_acc acc;
+ Mpz.submul_ui res_acc den d;
Mpz.mul_si res_acc res_acc 10;
()
Without the aliasing optimization (if res_foo != foo
), the performance of the new version is essentially the same as my version on my machine, despite the lower instruction count. With the aliasing optimizations, there is a very small improvement in performance, typically 1.35s instead of 1.4s.
On the other hand, a further optimization provided a larger speedup:
let extract =
- let tmp : Mpz.m Mpz.tt = Mpz.init () in
- let res : Mpz.m Mpz.tt = Mpz.init () in
+ let tmp1 : Mpz.m Mpz.tt = Mpz.init () in
+ let tmp2 : Mpz.m Mpz.tt = Mpz.init () in
fun (num,den,acc) nth ->
(* (nth * num + acc) / den |> to_int *)
- Mpz.set tmp acc;
- Mpz.addmul_ui tmp num nth;
- Mpz.tdiv_q res tmp den;
- Mpz.get_int res
+ Mpz.mul_si tmp1 num nth;
+ Mpz.add tmp2 acc tmp1;
+ Mpz.tdiv_q tmp1 tmp2 den;
+ Mpz.get_int tmp1
This goes from 1.35s to 1.25s.
Final version:
let extract =
let tmp1 : Mpz.m Mpz.tt = Mpz.init () in
let tmp2 : Mpz.m Mpz.tt = Mpz.init () in
fun (num,den,acc) nth ->
(* (nth * num + acc) / den |> to_int *)
Mpz.mul_si tmp1 num nth;
Mpz.add tmp2 acc tmp1;
Mpz.tdiv_q tmp1 tmp2 den;
Mpz.get_int tmp1
let prod =
fun (res_num, res_den, res_acc) (num,den,acc) d ->
(* (10 * num, den, 10 * (acc - den * of_int d)) *)
Mpz.mul_si res_num num 10;
if res_den != den then Mpz.set res_den den;
if res_acc != acc then Mpz.set res_acc acc;
Mpz.submul_ui res_acc den d;
Mpz.mul_si res_acc res_acc 10;
()
let cons =
fun (res_num, res_den, res_acc) (num,den,acc) k ->
let k2 = k * 2 + 1 in
(* (k * num, k2 * den, k2 * (acc + num + num)) *)
Mpz.mul_si res_den den k2;
Mpz.add res_acc acc num;
Mpz.add res_acc res_acc num;
Mpz.mul_si res_acc res_acc k2;