Ocaml speed : recursive function optimization

I have this recursive function that mimicks a “battle game” :

let rec pli2 jeu1 jeu2 tas1 tas2 nb_plis = 
  match jeu1, jeu2 with
     [],_ | _,[] -> nb_plis 
  |  c1::l1,c2::l2 -> 
      begin
        if (c1 > c2) then pli2 (l1@(c1::c2::tas1)@tas2) l2 [] [] (nb_plis + 1)
        else if (c2 > c1) then pli2  l1 (l2@(c2::c1::tas2)@tas1) [] [] (nb_plis + 1)
        else match l1,l2 with 
           [],_ | _,[] -> nb_plis + 1
        |  c11::l11,c22::l22 -> pli2 l11 l22 (tas1@[c1;c11]) (tas2@[c2;c22]) (nb_plis + 1)
      end

where all the parameters are lists, execpt nb_plis which is an integer.

When I run this function many times, with different and random jeu1 and jeu2, I can measure that on average, it takes 230us (micro seconds) to run it.

I build my project with dune, with :

dune exec Bataille --release

in case it makes any difference. I have an Windows setup.

Having simulated the “Battle game” in C++, Python and Rust, I am a bit disapointed by Ocaml performance (not with recursive functions though). It takes only 30us in C++ and Rust, while in Python it takes more time.

I am no expert in Rust at all (even worst than in Ocaml). What can I do to improve ocaml performance ?

This looks like a data structure problem to me because of the @ symbol concatenating lists together. I’m not 100% sure how the @ symbol is implemented, but it takes at least O(n) time (and doesn’t have the cache benefits of arrays which also take linear time to copy). The main slowdown here is because of the immutable data structure used.

I don’t have any immediate, easy suggestions but someone else might.

1 Like

You are using a lot of List.append, which is not very efficient. This suggests that your data structure is not appropriate (for efficiency). You could for instance use Queue, which is well suited for “pop” and “append” games like this one. I adapted your code with Queue, and it runs 40x faster! (see the pliq function below). However note that Queue is a mutable data structure.

let rec pli2 jeu1 jeu2 tas1 tas2 nb_plis =
  match jeu1, jeu2 with
    [],_ | _,[] -> nb_plis
    |  c1::l1,c2::l2 ->
        begin
          if (c1 > c2) then pli2 (l1@(c1::c2::tas1)@tas2) l2 [] [] (nb_plis + 1)
          else if (c2 > c1) then pli2  l1 (l2@(c2::c1::tas2)@tas1) [] [] (nb_plis + 1)
          else match l1,l2 with
                 [],_ | _,[] -> nb_plis + 1
                 |  c11::l11,c22::l22 -> pli2 l11 l22 (tas1@[c1;c11]) (tas2@[c2;c22]) (nb_plis + 1)
        end

let rnd_jeu n =
  Array.init n (fun _ -> Random.int 55) |> Array.to_list

let pliq jeu1 jeu2 tas1 tas2 =
  let rec loop nb_plis =
    match Queue.take_opt jeu1, Queue.take_opt jeu2 with
    | None, _
      | _, None -> nb_plis
    | Some c1, Some c2 ->
       if c1 > c2 then begin
           Queue.add c1 jeu1;
           Queue.add c2 jeu1;
           Queue.transfer tas1 jeu1;
           Queue.transfer tas2 jeu1
         end else if c1 < c2 then  begin
           Queue.add c2 jeu2;
           Queue.add c1 jeu2;
           Queue.transfer tas2 jeu2;
           Queue.transfer tas1 jeu2
         end else begin
           Queue.add c1 tas1;
           Queue.add c2 tas2;
           Option.iter (fun c -> Queue.add c tas1) (Queue.take_opt jeu1);
           Option.iter (fun c -> Queue.add c tas2) (Queue.take_opt jeu2)
         end;
       loop (nb_plis + 1) in
  loop 0

let queue_of_list l =
  let q = Queue.create () in
  List.iter (fun c -> Queue.add c q) l;
  q

let time_it f =
  let t = Sys.time () in
  f ();
  print_endline (" Time = " ^ (string_of_float (Sys.time () -. t)))

let () =

  let n = 1000 in
  let jeu1 = rnd_jeu n in
  let jeu2 = rnd_jeu n in
  time_it (fun () ->
      pli2 jeu1 jeu2 [] [] 0
      |> print_int);

  let jeu1 = queue_of_list jeu1 in
  let jeu2 = queue_of_list jeu2 in
  time_it (fun () ->
      pliq jeu1 jeu2 (Queue.create ()) (Queue.create ())
      |> print_int)

On my (old) laptop I get

$ dune exec ./bataille.exe
460139 Time = 2.420278           
460139 Time = 0.061268

The first time 2.420278s is for your List implementation, and the second one 0.061268s for the Queue one.
I didn’t test it thoroughly, but the fact that they give the same number of rounds 460139 is a good sign!

4 Likes

Thanks to your advices, I improved the speed to 56us which is not yet as good as rust and C++ but of the same order of magnitude.

3 Likes