I have this piece of code that I already tried to optimize
and loop2 i j =
if
(if !last_time_check > check_time then (
last_time_check := 0;
true)
else (
incr last_time_check;
false))
&& max_time <> infinity
&& Unix.gettimeofday () -. start_time > max_time
then raise Timed_Out;
j >= bound - max (2 * partial) (1 - i)
|| (let diff =
adj_matrix.(path.(i)).(path.(j))
+ adj_matrix.(path.(i + 1)).(path.(bounded (j + 1)))
- adj_matrix.(path.(i)).(path.(i + 1))
- adj_matrix.(path.(j)).(path.(bounded (j + 1)))
in
if diff < 0 then (
invertPath i j path;
if debug then Printf.printf "\ninverted %d and %d, diff : %d" i j diff;
false)
else true)
&& loop2 i (j + 1)
in
I already used the -unsafe flag which made me win some time, but now I don’t know if there is anything else I can do. I’m also using a switch with the lambda optimizer but with no flag appart from -unsafe
The max function appears quite high in your profile. It’s one of these annoying little details: the polymorphic comparison functions (<, = and so on) can be specialised when used on base types like int, and compiled efficiently, but max and min are regular OCaml functions, so they don’t get specialised.
I remember some recent work in the compiler to handle this case better, but I don’t remember if it made it to a release version. For now, you can work around the issue by redefining your own max function with stricter type constraints:
let max (x : int) (y : int) = if x < y then y else x
From the profile you’ve posted, it looks that this could save you almost half of the time.
EDIT: If you’re using OCaml 4.13 or later, you can use Int.max directly.
You could do with less calls to Unix.gettimeofday and precompute some constants like adj_matrix.(path.(i)).(path.(i + 1)), but in reality your code is slow because of the part you didn’t quote:
let rec rec_while i =
(i < max_iter || max_iter < 0)
&& (not (loop1 lower_bound))
&& rec_while (i + 1)
and loop1 i = i >= bound - 3 - partial || (loop2 i (i + 2) && loop1 (i + 1))
These two loops have a huge impact on the complexity O(max_iter * N²) !
Rather than aborting the search as soon as you find a shortcut, and restarting from scratch, you can keep going: if all the optimizations are at the end of the path, this skips re-iterating so much on the start of the path.
But you’ll see even better gain if you invest in some space partitionning for your points: It’s not really useful to check every pair (i,j) since most are too far apart to yield any shortcut!
The argument i of the function loop2 doesn’t change during its recursion, so you could precompute the array accesses that depend on i:
and loop2 i j =
let path_i = path.(i) in
let path_i1 = path.(i + 1) in
let cost_edge_i = adj_matrix.(path_i).(path_i1) in
let rec inner_loop2 j =
... || (... && inner_loop2 (j + 1))
in
inner_loop2 j
It’s unlikely to yield a significant improvement though, you should really focus on performing less “useless” iterations.
Unfortunately not. Currently the choice of which implementation to use for the comparison function is done just after type-checking. The rest of the program then sees whatever code the type-checker choosed to use, and in the case of a generic comparison that’s just a call to a C function, which are opaque to Flambda.
We do have an experimental Flambda 2 branch where we have added the ability to specialise these generic comparison functions after inlining, but it’s not released yet.
Here is the associated perf graph and the new code (I committed the changes so full code will be in the same file as opt_fast function :
let opt_fast ?(debug = false) ?(partial_path = false) ?(max_iter = -1)
?(max_time = infinity) ?(lower_bound = 0) ?(upper_bound = -1)
?(check_time = 1000) adj_matrix path =
(* If [partial_path] is set to true the algorithm won't try to optimize the edge between the end of the path and the beginning.
It's useful if you want to optimize the part of a path *)
let bound =
if upper_bound < 0 then Array.length path
else min upper_bound @@ Array.length path
in
let bounded i = if i >= bound then i - bound else i in
let partial = if partial_path then 1 else 0 in
let last_time_check = ref 0 in
let start_time = if max_time = infinity then 0. else Unix.gettimeofday () in
(* SI max_time = infinity ne pas appeler Unix.gettimeofday () *)
let rec rec_while i =
(i < max_iter || max_iter < 0)
&& (not (loop1 lower_bound true))
&& rec_while (i + 1)
and loop1 i is_opt =
if i >= bound - 3 - partial then is_opt
else loop1 (i + 1) (loop2 i (i + 2) is_opt)
and loop2 i j is_opt =
if
max_time <> infinity
&& (if !last_time_check > check_time then (
last_time_check := 0;
true)
else (
incr last_time_check;
false))
&& Unix.gettimeofday () -. start_time > max_time
then raise Timed_Out;
if j >= bound - max (2 * partial) (1 - i) then is_opt
else
let diff =
adj_matrix.(path.(i)).(path.(j))
+ adj_matrix.(path.(i + 1)).(path.(bounded (j + 1)))
- adj_matrix.(path.(i)).(path.(i + 1))
- adj_matrix.(path.(j)).(path.(bounded (j + 1)))
in
if diff < 0 then (
invertPath i j path;
if debug then Printf.printf "\ninverted %d and %d, diff : %d" i j diff);
loop2 i (j + 1) (is_opt && diff >= 0)
in
try
let is_opt = loop1 lower_bound true in
(if not is_opt then
let _ = rec_while 1 in
());
is_opt
with Timed_Out -> false