[help]Improving the performance of this code

I have a version that stores the Chrome.t fields (x and fitness) directly in a bigger array. It does improve performance a lot, but makes it impossible to use regular array functions on the structure (I had to copy the implementation of Array.stable_sort and tweak it slightly to make everything work). I’ll try to clean it up a bit and will submit it.

Here is the code:

module Multi_float_array (P : sig
    type elt
    val size : int
    val get : Float.Array.t -> int -> elt
    val set : Float.Array.t -> int -> elt -> unit
    val compare : elt -> elt -> int
  end) = struct
  type t = Float.Array.t

  let make len = Float.Array.make (len * P.size) 0.

  let length a = (Float.Array.length a) / P.size

  let[@inline] get a i = (P.get[@inlined hint]) a (i * P.size)

  let[@inline] set a i v = (P.set[@inlined hint]) a (i * P.size) v

  let blit src src_ofs dst dst_ofs len =
    Float.Array.blit src (src_ofs * P.size) dst (dst_ofs * P.size) (len * P.size)

  let stable_sort a =
    let cutoff = 5 in
    let cmp = P.compare in
    let merge src1ofs src1len src2 src2ofs src2len dst dstofs =
      let src1r = src1ofs + src1len and src2r = src2ofs + src2len in
      let rec loop i1 s1 i2 s2 d =
        if cmp s1 s2 <= 0 then begin
          set dst d s1;
          let i1 = i1 + 1 in
          if i1 < src1r then
            loop i1 (get a i1) i2 s2 (d + 1)
          else
            blit src2 i2 dst (d + 1) (src2r - i2)
        end else begin
          set dst d s2;
          let i2 = i2 + 1 in
          if i2 < src2r then
            loop i1 s1 i2 (get src2 i2) (d + 1)
          else
            blit a i1 dst (d + 1) (src1r - i1)
        end
      in loop src1ofs (get a src1ofs) src2ofs (get src2 src2ofs) dstofs;
    in
    let isortto srcofs dst dstofs len =
      for i = 0 to len - 1 do
        let e = (get a (srcofs + i)) in
        let j = ref (dstofs + i - 1) in
        while (!j >= dstofs && cmp (get dst !j) e > 0) do
          set dst (!j + 1) (get dst !j);
          decr j;
        done;
        set dst (!j + 1) e;
      done;
    in
    let rec sortto srcofs dst dstofs len =
      if len <= cutoff then isortto srcofs dst dstofs len else begin
        let l1 = len / 2 in
        let l2 = len - l1 in
        sortto (srcofs + l1) dst (dstofs + l1) l2;
        sortto srcofs a (srcofs + l2) l1;
        merge (srcofs + l2) l1 dst (dstofs + l1) l2 dst dstofs;
      end;
    in
    let l = length a in
    if l <= cutoff then isortto 0 a 0 l else begin
      let l1 = l / 2 in
      let l2 = l - l1 in
      let t = make l2 in
      sortto l1 t 0 l2;
      sortto 0 a l2 l1;
      merge l2 l1 t 0 l2 a 0;
    end
end[@@inline]

let _MIN_X = 0.
let _MAX_X = 10.

module Chrome = struct
  type t = { x : float; fitness : float }

  type elt = t

  let compare {x = _; fitness = f1 } { x = _; fitness = f2 } =
    Float.compare f1 f2

  let size = 2

  let get base pos =
    { x = Float.Array.get base pos;
      fitness = Float.Array.get base (pos + 1)
    }

  let set base pos t =
    Float.Array.set base pos t.x;
    Float.Array.set base (pos + 1) t.fitness

  let init ?(min = _MIN_X) ?(max = _MAX_X) () =
    let x = Random.float (max -. min) +. min in
    let fitness = x *. x *. x in
    { x; fitness }

  let mutate () = init ()

  let cross c1 c2 =
    let median = (c1.x +. c2.x) /. 2. in
    (init ~max:median (), init ~min:median ())
end

module Chrome_array = Multi_float_array(Chrome)

module Gen = struct
  type t = {
    chromes : Chrome_array.t;
    genSize : int;
    genCount : int;
    mutable bestChrome : Chrome.t option;
    mutRatio : float;
    crossRatio : float;
  }

  let init ~genSize ~genCount ~mutRatio ~crossRatio =
    let chromes = Chrome_array.make (genSize * Chrome.size) in
    for pos = 0 to genSize - 1 do
      Chrome_array.set chromes pos (Chrome.init ())
    done;
    {
      chromes;
      genSize;
      genCount;
      bestChrome = None;
      mutRatio;
      crossRatio;
    }

  let mutate g =
    for i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if ratio <= g.mutRatio then
        let c = Chrome.mutate () in
        Chrome_array.set g.chromes i c
    done

  let cross g =
    for _i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if ratio <= g.crossRatio then begin
        let i1, i2 =
          ( Random.int g.genSize,
            Random.int g.genSize )
        in
        let c1, c2 =
          Chrome.cross
            (Chrome_array.get g.chromes i1)
            (Chrome_array.get g.chromes i2)
        in
        Chrome_array.set g.chromes i1 c1;
        Chrome_array.set g.chromes i2 c2
      end
    done


  let updateGen g =
    Chrome_array.stable_sort g.chromes;
    g.bestChrome <- Some (Chrome_array.get g.chromes 0)

  let start g =
    for _i = 0 to g.genCount - 1 do
      let () = g |> mutate in
      let () = g |> cross in
      g |> updateGen
    done
end

let start () =
  let gen =
    Gen.init ~genSize:10000 ~genCount:1000 ~mutRatio:0.15 ~crossRatio:0.65
  in
  let () = gen |> Gen.start in
  match gen.bestChrome with
  | None -> ()
  | Some c ->
      print_endline
        ("best value is "
        ^ (c.fitness |> string_of_float)
        ^ " and best x is " ^ (c.x |> string_of_float))

let main () =
  let testCount = 10 in
  let timeStart = Sys.time () in
  let () =
    for _i = 0 to testCount - 1 do
      start ()
    done
  in
  let timeEnd = Sys.time () in
  print_endline
    ("mean time is "
    ^ (let timeUse =
         (timeEnd -. timeStart) *. 1000. /. (testCount |> float_of_int)
       in
       timeUse |> Float.round |> int_of_float |> string_of_int)
    ^ "ms")

let () = main ()

The mean time on a sample run on my computer went from ~4s to ~2.5s.

3 Likes

Thanks for the reply, this optimization seems not worked in my computer, i just replaced Array.sort with Array.stable_sort of my old version of OCaml code, the mean time changed from 2700ms to 2260ms, and i also test your version, the mean time is 2403ms. You can see the code here:
build.sh

ocamlopt -o gen.exe -O3 -unsafe gen.ml
ocamlopt -o fast_gen.exe -O3 -unsafe fast_gen.ml

gen.ml

let _MIN_X = 0.
let _MAX_X = 10.

(* [min, max] *)
let randInt ~min ~max = Random.int (max - min) + min

module Chrome = struct
  type t = { mutable x : float; mutable fitness : float }

  let init () = { x = Random.float (_MAX_X -. _MIN_X) +. _MIN_X; fitness = 0. }
  let calculate c = c.fitness <- c.x *. c.x *. c.x

  let mutate () =
    let c = init () in
    let () = calculate c in
    c

  let cross c1 c2 =
    let median = (c1.x +. c2.x) *. 0.5 in
    let newC1 = { x = 0.; fitness = 0. } in
    let newC2 = { x = 0.; fitness = 0. } in
    let () = newC1.x <- Random.float (median -. _MIN_X) +. _MIN_X in
    let () = calculate newC1 in
    let () = newC2.x <- Random.float (_MAX_X -. median) +. median in
    let () = calculate newC2 in
    (newC1, newC2)
end

module Gen = struct
  type t = {
    mutable chromes : Chrome.t array;
    genSize : int;
    genCount : int;
    mutable bestChrome : Chrome.t option;
    mutRatio : float;
    crossRatio : float;
  }

  let init g =
    g.chromes <-
      Array.init g.genSize (fun _i ->
          let c = Chrome.init () in
          let () = Chrome.calculate c in
          c)

  let mutate g =
    for i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if Float.compare ratio g.mutRatio != 1 then
        let c = Chrome.mutate () in
        Array.set g.chromes i c
    done

  let cross g =
    for _i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if Float.compare ratio g.crossRatio != 1 then
        let i1, i2 =
          ( randInt ~min:0 ~max:(g.genSize - 1),
            randInt ~min:0 ~max:(g.genSize - 1) )
        in
        let c1, c2 = Chrome.cross g.chromes.(i1) g.chromes.(i2) in
        let () = g.chromes.(i1) <- c1 in
        g.chromes.(i2) <- c2
    done

  let updateGen g =
    let () =
      Array.stable_sort
        (fun (c1 : Chrome.t) (c2 : Chrome.t) ->
          Float.compare c1.fitness c2.fitness)
        g.chromes
    in
    g.bestChrome <- Some g.chromes.(0)

  let start g =
    for _i = 0 to g.genCount - 1 do
      let () = mutate g in
      let () = cross g in
      g |> updateGen
    done
end

let start () =
  let gen : Gen.t =
    {
      chromes = ([||] : Chrome.t array);
      genSize = 10000;
      genCount = 1000;
      bestChrome = None;
      mutRatio = 0.15;
      crossRatio = 0.65;
    }
  in
  let () = gen |> Gen.init in
  let () = gen |> Gen.start in
  match gen.bestChrome with
  | None -> ()
  | Some c ->
      Stdlib.prerr_endline
        ("best value is "
        ^ (c.fitness |> Float.to_string)
        ^ " and best x is " ^ (c.x |> Float.to_string))

let main () =
  let testCount = 10 in
  let timeStart = Sys.time () in
  let () =
    for _i = 0 to testCount - 1 do
      start ()
    done
  in
  let timeEnd = Sys.time () in
  Stdlib.print_endline
    ("mean time is "
    ^ (let timeUse =
         (timeEnd -. timeStart) *. 1000. /. (testCount |> Float.of_int)
       in
       timeUse |> Float.round |> Int.of_float |> Int.to_string)
    ^ "ms")

let () = main ()

fast_gen.ml(your code)

module Multi_float_array (P : sig
  type elt

  val size : int
  val get : Float.Array.t -> int -> elt
  val set : Float.Array.t -> int -> elt -> unit
  val compare : elt -> elt -> int
end) =
struct
  type t = Float.Array.t

  let make len = Float.Array.make (len * P.size) 0.
  let length a = Float.Array.length a / P.size
  let[@inline] get a i = (P.get [@inlined hint]) a (i * P.size)
  let[@inline] set a i v = (P.set [@inlined hint]) a (i * P.size) v

  let blit src src_ofs dst dst_ofs len =
    Float.Array.blit src (src_ofs * P.size) dst (dst_ofs * P.size) (len * P.size)

  let stable_sort a =
    let cutoff = 5 in
    let cmp = P.compare in
    let merge src1ofs src1len src2 src2ofs src2len dst dstofs =
      let src1r = src1ofs + src1len and src2r = src2ofs + src2len in
      let rec loop i1 s1 i2 s2 d =
        if cmp s1 s2 <= 0 then (
          set dst d s1;
          let i1 = i1 + 1 in
          if i1 < src1r then loop i1 (get a i1) i2 s2 (d + 1)
          else blit src2 i2 dst (d + 1) (src2r - i2))
        else (
          set dst d s2;
          let i2 = i2 + 1 in
          if i2 < src2r then loop i1 s1 i2 (get src2 i2) (d + 1)
          else blit a i1 dst (d + 1) (src1r - i1))
      in
      loop src1ofs (get a src1ofs) src2ofs (get src2 src2ofs) dstofs
    in
    let isortto srcofs dst dstofs len =
      for i = 0 to len - 1 do
        let e = get a (srcofs + i) in
        let j = ref (dstofs + i - 1) in
        while !j >= dstofs && cmp (get dst !j) e > 0 do
          set dst (!j + 1) (get dst !j);
          decr j
        done;
        set dst (!j + 1) e
      done
    in
    let rec sortto srcofs dst dstofs len =
      if len <= cutoff then isortto srcofs dst dstofs len
      else
        let l1 = len / 2 in
        let l2 = len - l1 in
        sortto (srcofs + l1) dst (dstofs + l1) l2;
        sortto srcofs a (srcofs + l2) l1;
        merge (srcofs + l2) l1 dst (dstofs + l1) l2 dst dstofs
    in
    let l = length a in
    if l <= cutoff then isortto 0 a 0 l
    else
      let l1 = l / 2 in
      let l2 = l - l1 in
      let t = make l2 in
      sortto l1 t 0 l2;
      sortto 0 a l2 l1;
      merge l2 l1 t 0 l2 a 0
end
[@@inline]

let _MIN_X = 0.
let _MAX_X = 10.

module Chrome = struct
  type t = { x : float; fitness : float }
  type elt = t

  let compare { x = _; fitness = f1 } { x = _; fitness = f2 } =
    Float.compare f1 f2

  let size = 2

  let get base pos =
    { x = Float.Array.get base pos; fitness = Float.Array.get base (pos + 1) }

  let set base pos t =
    Float.Array.set base pos t.x;
    Float.Array.set base (pos + 1) t.fitness

  let init ?(min = _MIN_X) ?(max = _MAX_X) () =
    let x = Random.float (max -. min) +. min in
    let fitness = x *. x *. x in
    { x; fitness }

  let mutate () = init ()

  let cross c1 c2 =
    let median = (c1.x +. c2.x) /. 2. in
    (init ~max:median (), init ~min:median ())
end

module Chrome_array = Multi_float_array (Chrome)

module Gen = struct
  type t = {
    chromes : Chrome_array.t;
    genSize : int;
    genCount : int;
    mutable bestChrome : Chrome.t option;
    mutRatio : float;
    crossRatio : float;
  }

  let init ~genSize ~genCount ~mutRatio ~crossRatio =
    let chromes = Chrome_array.make (genSize * Chrome.size) in
    for pos = 0 to genSize - 1 do
      Chrome_array.set chromes pos (Chrome.init ())
    done;
    { chromes; genSize; genCount; bestChrome = None; mutRatio; crossRatio }

  let mutate g =
    for i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if ratio <= g.mutRatio then
        let c = Chrome.mutate () in
        Chrome_array.set g.chromes i c
    done

  let cross g =
    for _i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if ratio <= g.crossRatio then (
        let i1, i2 = (Random.int g.genSize, Random.int g.genSize) in
        let c1, c2 =
          Chrome.cross
            (Chrome_array.get g.chromes i1)
            (Chrome_array.get g.chromes i2)
        in
        Chrome_array.set g.chromes i1 c1;
        Chrome_array.set g.chromes i2 c2)
    done

  let updateGen g =
    Chrome_array.stable_sort g.chromes;
    g.bestChrome <- Some (Chrome_array.get g.chromes 0)

  let start g =
    for _i = 0 to g.genCount - 1 do
      let () = g |> mutate in
      let () = g |> cross in
      g |> updateGen
    done
end

let start () =
  let gen =
    Gen.init ~genSize:10000 ~genCount:1000 ~mutRatio:0.15 ~crossRatio:0.65
  in
  let () = gen |> Gen.start in
  match gen.bestChrome with
  | None -> ()
  | Some c ->
      print_endline
        ("best value is "
        ^ (c.fitness |> string_of_float)
        ^ " and best x is " ^ (c.x |> string_of_float))

let main () =
  let testCount = 10 in
  let timeStart = Sys.time () in
  let () =
    for _i = 0 to testCount - 1 do
      start ()
    done
  in
  let timeEnd = Sys.time () in
  print_endline
    ("mean time is "
    ^ (let timeUse =
         (timeEnd -. timeStart) *. 1000. /. (testCount |> float_of_int)
       in
       timeUse |> Float.round |> int_of_float |> string_of_int)
    ^ "ms")

let () = main () 

result of gen.ml

best value is 5.2671890462e-12 and best x is 0.000173990806823
best value is 2.39369249382e-17 and best x is 2.88196997976e-06
best value is 3.50392541523e-10 and best x is 0.00070499323738
best value is 1.04521736742e-09 and best x is 0.00101485081727
best value is 7.1120663069e-11 and best x is 0.000414316216726
best value is 6.28148092479e-12 and best x is 0.000184510328369
best value is 6.24688116892e-10 and best x is 0.000854845732979
best value is 7.74849079166e-10 and best x is 0.000918485646354
best value is 1.21257054376e-08 and best x is 0.00229739499884
best value is 1.97807675276e-10 and best x is 0.000582658893358
mean time is 2262ms

result of fast_gen.ml

best value is 1.15352994262e-08 and best x is 0.00225948584032
best value is 1.54486195232e-09 and best x is 0.00115601435158
best value is 2.48326882897e-11 and best x is 0.000291748016684
best value is 9.96224117638e-11 and best x is 0.000463573943407
best value is 1.16131699968e-11 and best x is 0.000226455876198
best value is 4.33358971316e-11 and best x is 0.000351249674583
best value is 1.80064614901e-07 and best x is 0.00564689170301
best value is 1.4520886812e-10 and best x is 0.000525610920713
best value is 1.89132959597e-10 and best x is 0.00057401389607
best value is 1.48900378495e-09 and best x is 0.00114191015171
mean time is 2403ms

Are you compiling this with flambda as initially suggested by @lukstafi?

opam switch create 4.14.1+flambda ocaml.4.14.1 ocaml-option-flambda

I took your initial version and replaced randInt calls with Random.int 1000 as @vlaviron suggested. ocamlopt -o gen.exe -O2 gen.ml under flambda then gave me significant speed-ups (I’m on a slower machine than you though)

2 Likes

I changed into 4.14.1+flambda, it is indeed faster than my old version and the Float.Array version, here is the result:
gen.ml

best value is 5.2671890462e-12 and best x is 0.000173990806823
best value is 2.39369249382e-17 and best x is 2.88196997976e-06
best value is 3.50392541523e-10 and best x is 0.00070499323738
best value is 1.04521736742e-09 and best x is 0.00101485081727
best value is 7.1120663069e-11 and best x is 0.000414316216726
best value is 6.28148092479e-12 and best x is 0.000184510328369
best value is 6.24688116892e-10 and best x is 0.000854845732979
best value is 7.74849079166e-10 and best x is 0.000918485646354
best value is 1.21257054376e-08 and best x is 0.00229739499884
best value is 1.97807675276e-10 and best x is 0.000582658893358
mean time is 1960ms

fast_gen.ml

best value is 1.15352994262e-08 and best x is 0.00225948584032
best value is 1.54486195232e-09 and best x is 0.00115601435158
best value is 2.48326882897e-11 and best x is 0.000291748016684
best value is 9.96224117638e-11 and best x is 0.000463573943407
best value is 1.16131699968e-11 and best x is 0.000226455876198
best value is 4.33358971316e-11 and best x is 0.000351249674583
best value is 1.80064614901e-07 and best x is 0.00564689170301
best value is 1.4520886812e-10 and best x is 0.000525610920713
best value is 1.89132959597e-10 and best x is 0.00057401389607
best value is 1.48900378495e-09 and best x is 0.00114191015171
mean time is 1546ms

But, this is still much slower than the rust version.

I don’t know how much of an impact it has, but your Rust version uses 32-bit floats while the OCaml version uses 64-bit floats. There are no 32-bit floats in OCaml, but you could switch the Rust version to f64 to see how it compares.

I have changed the float size from 32-bits to 64-bit, the measure result is nearly unchanged(from 900ms to 912ms).
result of 32-bits

best value is 0.00000000005988138 and best x is 0.0003912286
best value is 0.0000000000035196058 and best x is 0.00015211242
best value is 0.00000000057644883 and best x is 0.0008322496
best value is 0.000000000008696707 and best x is 0.00020564506
best value is 0.0000000000067636847 and best x is 0.00018911579
best value is 0.0000000010156723 and best x is 0.0010051971
best value is 0.000000000000001479852 and best x is 0.0000113956585
best value is 0.00000000046494394 and best x is 0.00077469996
best value is 0.000000000012882024 and best x is 0.00023442002
best value is 0.0000000021282005 and best x is 0.0012862859
mean time is 900ms

result of 64-bits

best value is 0.0000000058477668720127 and best x is 0.00180162064657216
best value is 0.0000000000026611724419478304 and best x is 0.00013857695232211142
best value is 0.00000000058150870335642 and best x is 0.0008346775636545467
best value is 0.000000000478984608281727 and best x is 0.0007824210378905012
best value is 0.000000000049074388830549084 and best x is 0.00036611565524501843
best value is 0.0000000005200740879052685 and best x is 0.0008041833405643376
best value is 0.00000000016031596668945342 and best x is 0.0005432406489841435
best value is 0.000000000048342858468375267 and best x is 0.00036428736532565057
best value is 0.0000000027258092748919238 and best x is 0.0013968994777376063
best value is 0.000000002512357508542904 and best x is 0.0013594413630785727
mean time is 912ms

There’s lots of non-idiomatic OCaml code in there, but the worse is perhaps

if Float.compare ratio g.crossRatio != 1 then ... else ...

which is a really obscure way of writing

if ratio <= g.crossRatio then .. else ...

and the compiled code is also a lot less efficient.

Also, my stomach still ache from your use of a full sort (O(n log n)) to extract the smallest element of an array (which is O(n)).

1 Like

I was able to somewhat improve your original code. On my machine your original code runs in ~3900ms, the original rust code runs in ~1300ms and my version below runs in ~1800ms. I kept the sorting since this is what the rust code also does, but replaced it with quicksort (for ease of writing) and inline the comparison function. I’m assuming you need the data to be sorted for some other processing that you removed from your sample code (otherwise, just scan the array to find the min). Also, you said you did not want to mutate the fields of Chrome.t in cross (the solution suggested by @Gopiandcode) because those record could be extracted. I still did it but I make a copy of the elements in the array before sorting, so updates only happen on newly allocated values that are not shared. If this is ok with you then I advise you also use in place swap in cross and copy the Chrome.t objects before storing them. If this is still not ok, then you can just change Chrome.swap in partition to just swap the two cells of the array, but you will gain less. I also tweaked the GC a bit which seems to help. If I fix the random seed, I get the exact same results as your original OCaml code.

(* compile with ocamlopt -inline 1000 -unsafe foo.ml *)

let () = Random.init 42 (* to compare results with original *)
let () = Gc.set { (Gc.get ()) with Gc.minor_heap_size = 1024 * 1024 * 16 }

let _MIN_X = 0.0
let _MAX_X = 10.0

(* [min, max[, also modified in the original code *)
let randInt ~min ~max = min + Random.int (max - min)

module Chrome = struct
  type t = { mutable x : float; mutable fitness : float }

  let compute_fitness x = x *. x *. x
  let init_x () = Random.float (_MAX_X -. _MIN_X) +. _MIN_X
  let init_with x = { x; fitness = compute_fitness x }
  let init () = init_with (init_x ())

  let cross c1 c2 =
    let median = (c1.x +. c2.x) *. 0.5 in
    let newC1 = init_with (Random.float (median -. _MIN_X) +. _MIN_X) in
    let newC2 = init_with (Random.float (_MAX_X -. median) +. median) in
    (newC1, newC2)

  let swap c1 c2 =
    let tmpx = c1.x in
    let tmpf = c1.fitness in
    c1.x <- c2.x;
    c1.fitness <- c2.fitness;
    c2.x <- tmpx;
    c2.fitness <- tmpf
end

module Gen = struct
  type t = {
    mutable chromes : Chrome.t array;
    genSize : int;
    genCount : int;
    mutable bestChrome : Chrome.t option;
    mutRatio : float;
    crossRatio : float;
  }

  let init g = g.chromes <- Array.init g.genSize (fun _i -> Chrome.init ())

  let mutate g =
    for i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if ratio <= g.mutRatio then g.chromes.(i) <- Chrome.init ()
    done

  let cross g =
    for _i = 0 to g.genSize - 1 do
      let ratio = Random.float 1. in
      if ratio <= g.crossRatio then
        let i1, i2 =
          (randInt ~min:0 ~max:g.genSize, randInt ~min:0 ~max:g.genSize)
        in
        let c1, c2 = Chrome.cross g.chromes.(i1) g.chromes.(i2) in
        let () = g.chromes.(i1) <- c1 in
        g.chromes.(i2) <- c2
    done

  let rec quicksort arr low high =
    if low < high then begin
      let pivot_index =
        partition arr low high arr.(high).Chrome.fitness (low - 1)
      in
      quicksort arr low (pivot_index - 1);
      quicksort arr (pivot_index + 1) high
  end

  and partition arr j high pivot_value i =
    if j < high then
      let i =
        if arr.(j).Chrome.fitness <= pivot_value then (
          let i = i + 1 in
          Chrome.swap arr.(i) arr.(j);
          i)
        else i
      in
      partition arr (j + 1) high pivot_value i
    else
      let i = i + 1 in
      Chrome.swap arr.(i) arr.(j);
      i

  let quicksort arr =
    for i = 0 to Array.length arr - 1 do
      let c = arr.(i) in
      arr.(i) <- { c with Chrome.x = c.Chrome.x }
    done;
    quicksort arr 0 (Array.length arr - 1)

  let updateGen g =
    let () = quicksort g.chromes in
    g.bestChrome <- Some g.chromes.(0)

  let start g =
    for _i = 0 to g.genCount - 1 do
      mutate g;
      cross g;
      updateGen g
    done
end

let start () =
  let gen : Gen.t =
    {
      chromes = ([||] : Chrome.t array);
      genSize = 10000;
      genCount = 1000;
      bestChrome = None;
      mutRatio = 0.15;
      crossRatio = 0.65;
    }
  in
  let () = gen |> Gen.init in
  let () = gen |> Gen.start in
  match gen.bestChrome with
  | None -> ()
  | Some c ->
      print_endline
        ("best value is "
        ^ (c.fitness |> string_of_float)
        ^ " and best x is " ^ (c.x |> string_of_float))

let main () =
  let testCount = 10 in
  let timeStart = Sys.time () in
  let () =
    for _i = 0 to testCount - 1 do
      start ()
    done
  in
  let timeEnd = Sys.time () in
  print_endline
    ("mean time is "
    ^ (let timeUse =
         (timeEnd -. timeStart) *. 1000. /. (testCount |> float_of_int)
       in
       timeUse |> Float.round |> int_of_float |> string_of_int)
    ^ "ms")

let () = main ()
2 Likes

Do we have an ocaml library for fixed point arithmetic?
I have a use case where it would be nice to see if performance of very numerical
code can be accelerated using this.

I just found in opam:

hardcaml_fixed_point --          Hardcaml fixed point arithmetic
shine                --          Fixed-point MP3 encoder

There might be some interesting code in there.

2 Likes

I have already explained why here must use sortting.

Thanks for the reply, this is what i really need, thanks again!

An interesting thing, i converted your code into Rescript, replaced Random module with Math.random, the code is also very efficient. (1080ms of OCaml vs 1428ms of Rescript)
Here is the code:

/* compile with ocamlopt -inline 1000 -unsafe foo.ml */

// let () = Random.init(42) /* to compare results with original */

let _MIN_X = 0.0
let _MAX_X = 10.0

/* [min, max[, also modified in the original code */
// let randInt = (~min, ~max) => min + Random.int(max - min)

let randInt = (~min, ~max) => {
  let ratio = Js.Math.random()
  let value = (max->Belt.Int.toFloat -. min->Belt.Int.toFloat) *. ratio
  value->Js.Math.unsafe_floor_int + min
}

module Chrome = {
  type t = {mutable x: float, mutable fitness: float}

  let compute_fitness = x => x *. x *. x
  let init_x = () => Js.Math.random() *. (_MAX_X -. _MIN_X) +. _MIN_X
  let init_with = x => {x, fitness: compute_fitness(x)}
  let init = () => init_with(init_x())

  let cross = (c1, c2) => {
    let median = (c1.x +. c2.x) *. 0.5
    let newC1 = init_with(Js.Math.random() *. (median -. _MIN_X) +. _MIN_X)
    let newC2 = init_with(Js.Math.random() *. (_MAX_X -. median) +. median)
    (newC1, newC2)
  }

  let swap = (c1, c2) => {
    let tmpx = c1.x
    let tmpf = c1.fitness
    c1.x = c2.x
    c1.fitness = c2.fitness
    c2.x = tmpx
    c2.fitness = tmpf
  }
}

module Gen = {
  type t = {
    mutable chromes: array<Chrome.t>,
    genSize: int,
    genCount: int,
    mutable bestChrome: option<Chrome.t>,
    mutRatio: float,
    crossRatio: float,
  }

  let init = g => g.chromes = Array.init(g.genSize, _i => Chrome.init())

  let mutate = g =>
    for i in 0 to g.genSize - 1 {
      let ratio = Js.Math.random()
      if ratio <= g.mutRatio {
        g.chromes->Belt.Array.setUnsafe(i, Chrome.init())
      }
    }

  let cross = g =>
    for _i in 0 to g.genSize - 1 {
      // let ratio = Random.float(1.)
      let ratio = Js.Math.random()
      if ratio <= g.crossRatio {
        let (i1, i2) = (randInt(~min=0, ~max=g.genSize), randInt(~min=0, ~max=g.genSize))

        let (c1, c2) = Chrome.cross(
          g.chromes->Belt.Array.getUnsafe(i1),
          g.chromes->Belt.Array.getUnsafe(i2),
        )
        g.chromes->Belt.Array.setUnsafe(i1, c1)
        g.chromes->Belt.Array.setUnsafe(i2, c2)
      }
    }

  let rec quicksort = (arr, low, high) =>
    if low < high {
      let pivot_index = partition(
        arr,
        low,
        high,
        (arr->Belt.Array.getUnsafe(high)).Chrome.fitness,
        low - 1,
      )

      quicksort(arr, low, pivot_index - 1)
      quicksort(arr, pivot_index + 1, high)
    }

  and partition = (arr, j, high, pivot_value, i) =>
    if j < high {
      let i = if (arr->Belt.Array.getUnsafe(j)).Chrome.fitness <= pivot_value {
        let i = i + 1
        Chrome.swap(arr->Belt.Array.getUnsafe(i), arr->Belt.Array.getUnsafe(j))
        i
      } else {
        i
      }

      partition(arr, j + 1, high, pivot_value, i)
    } else {
      let i = i + 1
      Chrome.swap(arr->Belt.Array.getUnsafe(i), arr->Belt.Array.getUnsafe(j))
      i
    }

  let quicksort = arr => {
    for i in 0 to Array.length(arr) - 1 {
      let c = arr->Belt.Array.getUnsafe(i)
      arr->Belt.Array.setUnsafe(i, {...c, Chrome.x: c.Chrome.x})
    }
    quicksort(arr, 0, Array.length(arr) - 1)
  }

  let updateGen = g => {
    let () = quicksort(g.chromes)
    g.bestChrome = Some(g.chromes[0])
  }

  let start = g =>
    for _i in 0 to g.genCount - 1 {
      mutate(g)
      cross(g)
      updateGen(g)
    }
}

let start = () => {
  let gen: Gen.t = {
    chromes: ([]: array<Chrome.t>),
    genSize: 10000,
    genCount: 1000,
    bestChrome: None,
    mutRatio: 0.15,
    crossRatio: 0.65,
  }

  let () = gen->Gen.init
  let () = gen->Gen.start
  switch gen.bestChrome {
  | None => ()
  | Some(c) =>
    print_endline(
      "best value is " ++
      (c.fitness->Belt.Float.toString ++
      (" and best x is " ++ c.x->Belt.Float.toString)),
    )
  }
}

let main = () => {
  let testCount = 10
  let timeStart = Sys.time()
  let () = for _i in 0 to testCount - 1 {
    start()
  }

  let timeEnd = Sys.time()
  print_endline(
    "mean time is " ++
    ({
      let timeUse = (timeEnd -. timeStart) *. 1000. /. testCount->float_of_int

      timeUse->Js.Math.unsafe_round->string_of_int
    } ++
    "ms"),
  )
}

main()

result

best value is 1.632582792297675e-8 and best x is 0.0025368321805161677
best value is 3.189175730351589e-13 and best x is 0.00006832182890983782
best value is 3.624163295527082e-11 and best x is 0.0003309298322899977
best value is 5.699835597420906e-12 and best x is 0.0001786298813568937
best value is 3.517319828982728e-11 and best x is 0.0003276453082897697
best value is 2.5804167236638335e-9 and best x is 0.0013716078098534256
best value is 8.417935998460742e-9 and best x is 0.0020342385170957957
best value is 6.296942359306891e-10 and best x is 0.0008571231780193898
best value is 5.52048479362766e-10 and best x is 0.0008203371998875497
best value is 8.911625410063919e-10 and best x is 0.0009623188090053025
mean time is 1428ms

I’m guessing that the biggest loss in performances when using a predefined sort is that the extracted floating point values are boxed just after extraction to be passed to the comparison function. Manually inlining <= in the sort function allows the compiler to directly compare the extracted floats.
Fair warning: I used quicksort just as a quick and dirty hack (and also to experiment and see if a small enough sorting function would get inlined), but I did not put much thought into it. I was satisfied by seeing that on test runs it returned the same results as Array.sort but you should double check!
Also I made the assumption that it would be called once on what appears to be a random array. If you happen to call it on an already sorted array it may exhibit quadratic behaviour (i.e. it’s not the clever quicksort that shuffles and takes care of equal pivot values).

1 Like