I have a version that stores the Chrome.t
fields (x
and fitness
) directly in a bigger array. It does improve performance a lot, but makes it impossible to use regular array functions on the structure (I had to copy the implementation of Array.stable_sort
and tweak it slightly to make everything work). I’ll try to clean it up a bit and will submit it.
Here is the code:
module Multi_float_array (P : sig
type elt
val size : int
val get : Float.Array.t -> int -> elt
val set : Float.Array.t -> int -> elt -> unit
val compare : elt -> elt -> int
end) = struct
type t = Float.Array.t
let make len = Float.Array.make (len * P.size) 0.
let length a = (Float.Array.length a) / P.size
let[@inline] get a i = (P.get[@inlined hint]) a (i * P.size)
let[@inline] set a i v = (P.set[@inlined hint]) a (i * P.size) v
let blit src src_ofs dst dst_ofs len =
Float.Array.blit src (src_ofs * P.size) dst (dst_ofs * P.size) (len * P.size)
let stable_sort a =
let cutoff = 5 in
let cmp = P.compare in
let merge src1ofs src1len src2 src2ofs src2len dst dstofs =
let src1r = src1ofs + src1len and src2r = src2ofs + src2len in
let rec loop i1 s1 i2 s2 d =
if cmp s1 s2 <= 0 then begin
set dst d s1;
let i1 = i1 + 1 in
if i1 < src1r then
loop i1 (get a i1) i2 s2 (d + 1)
else
blit src2 i2 dst (d + 1) (src2r - i2)
end else begin
set dst d s2;
let i2 = i2 + 1 in
if i2 < src2r then
loop i1 s1 i2 (get src2 i2) (d + 1)
else
blit a i1 dst (d + 1) (src1r - i1)
end
in loop src1ofs (get a src1ofs) src2ofs (get src2 src2ofs) dstofs;
in
let isortto srcofs dst dstofs len =
for i = 0 to len - 1 do
let e = (get a (srcofs + i)) in
let j = ref (dstofs + i - 1) in
while (!j >= dstofs && cmp (get dst !j) e > 0) do
set dst (!j + 1) (get dst !j);
decr j;
done;
set dst (!j + 1) e;
done;
in
let rec sortto srcofs dst dstofs len =
if len <= cutoff then isortto srcofs dst dstofs len else begin
let l1 = len / 2 in
let l2 = len - l1 in
sortto (srcofs + l1) dst (dstofs + l1) l2;
sortto srcofs a (srcofs + l2) l1;
merge (srcofs + l2) l1 dst (dstofs + l1) l2 dst dstofs;
end;
in
let l = length a in
if l <= cutoff then isortto 0 a 0 l else begin
let l1 = l / 2 in
let l2 = l - l1 in
let t = make l2 in
sortto l1 t 0 l2;
sortto 0 a l2 l1;
merge l2 l1 t 0 l2 a 0;
end
end[@@inline]
let _MIN_X = 0.
let _MAX_X = 10.
module Chrome = struct
type t = { x : float; fitness : float }
type elt = t
let compare {x = _; fitness = f1 } { x = _; fitness = f2 } =
Float.compare f1 f2
let size = 2
let get base pos =
{ x = Float.Array.get base pos;
fitness = Float.Array.get base (pos + 1)
}
let set base pos t =
Float.Array.set base pos t.x;
Float.Array.set base (pos + 1) t.fitness
let init ?(min = _MIN_X) ?(max = _MAX_X) () =
let x = Random.float (max -. min) +. min in
let fitness = x *. x *. x in
{ x; fitness }
let mutate () = init ()
let cross c1 c2 =
let median = (c1.x +. c2.x) /. 2. in
(init ~max:median (), init ~min:median ())
end
module Chrome_array = Multi_float_array(Chrome)
module Gen = struct
type t = {
chromes : Chrome_array.t;
genSize : int;
genCount : int;
mutable bestChrome : Chrome.t option;
mutRatio : float;
crossRatio : float;
}
let init ~genSize ~genCount ~mutRatio ~crossRatio =
let chromes = Chrome_array.make (genSize * Chrome.size) in
for pos = 0 to genSize - 1 do
Chrome_array.set chromes pos (Chrome.init ())
done;
{
chromes;
genSize;
genCount;
bestChrome = None;
mutRatio;
crossRatio;
}
let mutate g =
for i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if ratio <= g.mutRatio then
let c = Chrome.mutate () in
Chrome_array.set g.chromes i c
done
let cross g =
for _i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if ratio <= g.crossRatio then begin
let i1, i2 =
( Random.int g.genSize,
Random.int g.genSize )
in
let c1, c2 =
Chrome.cross
(Chrome_array.get g.chromes i1)
(Chrome_array.get g.chromes i2)
in
Chrome_array.set g.chromes i1 c1;
Chrome_array.set g.chromes i2 c2
end
done
let updateGen g =
Chrome_array.stable_sort g.chromes;
g.bestChrome <- Some (Chrome_array.get g.chromes 0)
let start g =
for _i = 0 to g.genCount - 1 do
let () = g |> mutate in
let () = g |> cross in
g |> updateGen
done
end
let start () =
let gen =
Gen.init ~genSize:10000 ~genCount:1000 ~mutRatio:0.15 ~crossRatio:0.65
in
let () = gen |> Gen.start in
match gen.bestChrome with
| None -> ()
| Some c ->
print_endline
("best value is "
^ (c.fitness |> string_of_float)
^ " and best x is " ^ (c.x |> string_of_float))
let main () =
let testCount = 10 in
let timeStart = Sys.time () in
let () =
for _i = 0 to testCount - 1 do
start ()
done
in
let timeEnd = Sys.time () in
print_endline
("mean time is "
^ (let timeUse =
(timeEnd -. timeStart) *. 1000. /. (testCount |> float_of_int)
in
timeUse |> Float.round |> int_of_float |> string_of_int)
^ "ms")
let () = main ()
The mean time on a sample run on my computer went from ~4s to ~2.5s.
Thanks for the reply, this optimization seems not worked in my computer, i just replaced Array.sort
with Array.stable_sort
of my old version of OCaml code, the mean time changed from 2700ms
to 2260ms
, and i also test your version, the mean time is 2403ms
. You can see the code here:
build.sh
ocamlopt -o gen.exe -O3 -unsafe gen.ml
ocamlopt -o fast_gen.exe -O3 -unsafe fast_gen.ml
gen.ml
let _MIN_X = 0.
let _MAX_X = 10.
(* [min, max] *)
let randInt ~min ~max = Random.int (max - min) + min
module Chrome = struct
type t = { mutable x : float; mutable fitness : float }
let init () = { x = Random.float (_MAX_X -. _MIN_X) +. _MIN_X; fitness = 0. }
let calculate c = c.fitness <- c.x *. c.x *. c.x
let mutate () =
let c = init () in
let () = calculate c in
c
let cross c1 c2 =
let median = (c1.x +. c2.x) *. 0.5 in
let newC1 = { x = 0.; fitness = 0. } in
let newC2 = { x = 0.; fitness = 0. } in
let () = newC1.x <- Random.float (median -. _MIN_X) +. _MIN_X in
let () = calculate newC1 in
let () = newC2.x <- Random.float (_MAX_X -. median) +. median in
let () = calculate newC2 in
(newC1, newC2)
end
module Gen = struct
type t = {
mutable chromes : Chrome.t array;
genSize : int;
genCount : int;
mutable bestChrome : Chrome.t option;
mutRatio : float;
crossRatio : float;
}
let init g =
g.chromes <-
Array.init g.genSize (fun _i ->
let c = Chrome.init () in
let () = Chrome.calculate c in
c)
let mutate g =
for i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if Float.compare ratio g.mutRatio != 1 then
let c = Chrome.mutate () in
Array.set g.chromes i c
done
let cross g =
for _i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if Float.compare ratio g.crossRatio != 1 then
let i1, i2 =
( randInt ~min:0 ~max:(g.genSize - 1),
randInt ~min:0 ~max:(g.genSize - 1) )
in
let c1, c2 = Chrome.cross g.chromes.(i1) g.chromes.(i2) in
let () = g.chromes.(i1) <- c1 in
g.chromes.(i2) <- c2
done
let updateGen g =
let () =
Array.stable_sort
(fun (c1 : Chrome.t) (c2 : Chrome.t) ->
Float.compare c1.fitness c2.fitness)
g.chromes
in
g.bestChrome <- Some g.chromes.(0)
let start g =
for _i = 0 to g.genCount - 1 do
let () = mutate g in
let () = cross g in
g |> updateGen
done
end
let start () =
let gen : Gen.t =
{
chromes = ([||] : Chrome.t array);
genSize = 10000;
genCount = 1000;
bestChrome = None;
mutRatio = 0.15;
crossRatio = 0.65;
}
in
let () = gen |> Gen.init in
let () = gen |> Gen.start in
match gen.bestChrome with
| None -> ()
| Some c ->
Stdlib.prerr_endline
("best value is "
^ (c.fitness |> Float.to_string)
^ " and best x is " ^ (c.x |> Float.to_string))
let main () =
let testCount = 10 in
let timeStart = Sys.time () in
let () =
for _i = 0 to testCount - 1 do
start ()
done
in
let timeEnd = Sys.time () in
Stdlib.print_endline
("mean time is "
^ (let timeUse =
(timeEnd -. timeStart) *. 1000. /. (testCount |> Float.of_int)
in
timeUse |> Float.round |> Int.of_float |> Int.to_string)
^ "ms")
let () = main ()
fast_gen.ml(your code)
module Multi_float_array (P : sig
type elt
val size : int
val get : Float.Array.t -> int -> elt
val set : Float.Array.t -> int -> elt -> unit
val compare : elt -> elt -> int
end) =
struct
type t = Float.Array.t
let make len = Float.Array.make (len * P.size) 0.
let length a = Float.Array.length a / P.size
let[@inline] get a i = (P.get [@inlined hint]) a (i * P.size)
let[@inline] set a i v = (P.set [@inlined hint]) a (i * P.size) v
let blit src src_ofs dst dst_ofs len =
Float.Array.blit src (src_ofs * P.size) dst (dst_ofs * P.size) (len * P.size)
let stable_sort a =
let cutoff = 5 in
let cmp = P.compare in
let merge src1ofs src1len src2 src2ofs src2len dst dstofs =
let src1r = src1ofs + src1len and src2r = src2ofs + src2len in
let rec loop i1 s1 i2 s2 d =
if cmp s1 s2 <= 0 then (
set dst d s1;
let i1 = i1 + 1 in
if i1 < src1r then loop i1 (get a i1) i2 s2 (d + 1)
else blit src2 i2 dst (d + 1) (src2r - i2))
else (
set dst d s2;
let i2 = i2 + 1 in
if i2 < src2r then loop i1 s1 i2 (get src2 i2) (d + 1)
else blit a i1 dst (d + 1) (src1r - i1))
in
loop src1ofs (get a src1ofs) src2ofs (get src2 src2ofs) dstofs
in
let isortto srcofs dst dstofs len =
for i = 0 to len - 1 do
let e = get a (srcofs + i) in
let j = ref (dstofs + i - 1) in
while !j >= dstofs && cmp (get dst !j) e > 0 do
set dst (!j + 1) (get dst !j);
decr j
done;
set dst (!j + 1) e
done
in
let rec sortto srcofs dst dstofs len =
if len <= cutoff then isortto srcofs dst dstofs len
else
let l1 = len / 2 in
let l2 = len - l1 in
sortto (srcofs + l1) dst (dstofs + l1) l2;
sortto srcofs a (srcofs + l2) l1;
merge (srcofs + l2) l1 dst (dstofs + l1) l2 dst dstofs
in
let l = length a in
if l <= cutoff then isortto 0 a 0 l
else
let l1 = l / 2 in
let l2 = l - l1 in
let t = make l2 in
sortto l1 t 0 l2;
sortto 0 a l2 l1;
merge l2 l1 t 0 l2 a 0
end
[@@inline]
let _MIN_X = 0.
let _MAX_X = 10.
module Chrome = struct
type t = { x : float; fitness : float }
type elt = t
let compare { x = _; fitness = f1 } { x = _; fitness = f2 } =
Float.compare f1 f2
let size = 2
let get base pos =
{ x = Float.Array.get base pos; fitness = Float.Array.get base (pos + 1) }
let set base pos t =
Float.Array.set base pos t.x;
Float.Array.set base (pos + 1) t.fitness
let init ?(min = _MIN_X) ?(max = _MAX_X) () =
let x = Random.float (max -. min) +. min in
let fitness = x *. x *. x in
{ x; fitness }
let mutate () = init ()
let cross c1 c2 =
let median = (c1.x +. c2.x) /. 2. in
(init ~max:median (), init ~min:median ())
end
module Chrome_array = Multi_float_array (Chrome)
module Gen = struct
type t = {
chromes : Chrome_array.t;
genSize : int;
genCount : int;
mutable bestChrome : Chrome.t option;
mutRatio : float;
crossRatio : float;
}
let init ~genSize ~genCount ~mutRatio ~crossRatio =
let chromes = Chrome_array.make (genSize * Chrome.size) in
for pos = 0 to genSize - 1 do
Chrome_array.set chromes pos (Chrome.init ())
done;
{ chromes; genSize; genCount; bestChrome = None; mutRatio; crossRatio }
let mutate g =
for i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if ratio <= g.mutRatio then
let c = Chrome.mutate () in
Chrome_array.set g.chromes i c
done
let cross g =
for _i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if ratio <= g.crossRatio then (
let i1, i2 = (Random.int g.genSize, Random.int g.genSize) in
let c1, c2 =
Chrome.cross
(Chrome_array.get g.chromes i1)
(Chrome_array.get g.chromes i2)
in
Chrome_array.set g.chromes i1 c1;
Chrome_array.set g.chromes i2 c2)
done
let updateGen g =
Chrome_array.stable_sort g.chromes;
g.bestChrome <- Some (Chrome_array.get g.chromes 0)
let start g =
for _i = 0 to g.genCount - 1 do
let () = g |> mutate in
let () = g |> cross in
g |> updateGen
done
end
let start () =
let gen =
Gen.init ~genSize:10000 ~genCount:1000 ~mutRatio:0.15 ~crossRatio:0.65
in
let () = gen |> Gen.start in
match gen.bestChrome with
| None -> ()
| Some c ->
print_endline
("best value is "
^ (c.fitness |> string_of_float)
^ " and best x is " ^ (c.x |> string_of_float))
let main () =
let testCount = 10 in
let timeStart = Sys.time () in
let () =
for _i = 0 to testCount - 1 do
start ()
done
in
let timeEnd = Sys.time () in
print_endline
("mean time is "
^ (let timeUse =
(timeEnd -. timeStart) *. 1000. /. (testCount |> float_of_int)
in
timeUse |> Float.round |> int_of_float |> string_of_int)
^ "ms")
let () = main ()
result of gen.ml
best value is 5.2671890462e-12 and best x is 0.000173990806823
best value is 2.39369249382e-17 and best x is 2.88196997976e-06
best value is 3.50392541523e-10 and best x is 0.00070499323738
best value is 1.04521736742e-09 and best x is 0.00101485081727
best value is 7.1120663069e-11 and best x is 0.000414316216726
best value is 6.28148092479e-12 and best x is 0.000184510328369
best value is 6.24688116892e-10 and best x is 0.000854845732979
best value is 7.74849079166e-10 and best x is 0.000918485646354
best value is 1.21257054376e-08 and best x is 0.00229739499884
best value is 1.97807675276e-10 and best x is 0.000582658893358
mean time is 2262ms
result of fast_gen.ml
best value is 1.15352994262e-08 and best x is 0.00225948584032
best value is 1.54486195232e-09 and best x is 0.00115601435158
best value is 2.48326882897e-11 and best x is 0.000291748016684
best value is 9.96224117638e-11 and best x is 0.000463573943407
best value is 1.16131699968e-11 and best x is 0.000226455876198
best value is 4.33358971316e-11 and best x is 0.000351249674583
best value is 1.80064614901e-07 and best x is 0.00564689170301
best value is 1.4520886812e-10 and best x is 0.000525610920713
best value is 1.89132959597e-10 and best x is 0.00057401389607
best value is 1.48900378495e-09 and best x is 0.00114191015171
mean time is 2403ms
Are you compiling this with flambda
as initially suggested by @lukstafi?
opam switch create 4.14.1+flambda ocaml.4.14.1 ocaml-option-flambda
I took your initial version and replaced randInt
calls with Random.int 1000
as @vlaviron suggested. ocamlopt -o gen.exe -O2 gen.ml
under flambda
then gave me significant speed-ups (I’m on a slower machine than you though)
I changed into 4.14.1+flambda, it is indeed faster than my old version and the Float.Array
version, here is the result:
gen.ml
best value is 5.2671890462e-12 and best x is 0.000173990806823
best value is 2.39369249382e-17 and best x is 2.88196997976e-06
best value is 3.50392541523e-10 and best x is 0.00070499323738
best value is 1.04521736742e-09 and best x is 0.00101485081727
best value is 7.1120663069e-11 and best x is 0.000414316216726
best value is 6.28148092479e-12 and best x is 0.000184510328369
best value is 6.24688116892e-10 and best x is 0.000854845732979
best value is 7.74849079166e-10 and best x is 0.000918485646354
best value is 1.21257054376e-08 and best x is 0.00229739499884
best value is 1.97807675276e-10 and best x is 0.000582658893358
mean time is 1960ms
fast_gen.ml
best value is 1.15352994262e-08 and best x is 0.00225948584032
best value is 1.54486195232e-09 and best x is 0.00115601435158
best value is 2.48326882897e-11 and best x is 0.000291748016684
best value is 9.96224117638e-11 and best x is 0.000463573943407
best value is 1.16131699968e-11 and best x is 0.000226455876198
best value is 4.33358971316e-11 and best x is 0.000351249674583
best value is 1.80064614901e-07 and best x is 0.00564689170301
best value is 1.4520886812e-10 and best x is 0.000525610920713
best value is 1.89132959597e-10 and best x is 0.00057401389607
best value is 1.48900378495e-09 and best x is 0.00114191015171
mean time is 1546ms
But, this is still much slower than the rust version.
I don’t know how much of an impact it has, but your Rust version uses 32-bit floats while the OCaml version uses 64-bit floats. There are no 32-bit floats in OCaml, but you could switch the Rust version to f64
to see how it compares.
I have changed the float size from 32-bits to 64-bit, the measure result is nearly unchanged(from 900ms to 912ms).
result of 32-bits
best value is 0.00000000005988138 and best x is 0.0003912286
best value is 0.0000000000035196058 and best x is 0.00015211242
best value is 0.00000000057644883 and best x is 0.0008322496
best value is 0.000000000008696707 and best x is 0.00020564506
best value is 0.0000000000067636847 and best x is 0.00018911579
best value is 0.0000000010156723 and best x is 0.0010051971
best value is 0.000000000000001479852 and best x is 0.0000113956585
best value is 0.00000000046494394 and best x is 0.00077469996
best value is 0.000000000012882024 and best x is 0.00023442002
best value is 0.0000000021282005 and best x is 0.0012862859
mean time is 900ms
result of 64-bits
best value is 0.0000000058477668720127 and best x is 0.00180162064657216
best value is 0.0000000000026611724419478304 and best x is 0.00013857695232211142
best value is 0.00000000058150870335642 and best x is 0.0008346775636545467
best value is 0.000000000478984608281727 and best x is 0.0007824210378905012
best value is 0.000000000049074388830549084 and best x is 0.00036611565524501843
best value is 0.0000000005200740879052685 and best x is 0.0008041833405643376
best value is 0.00000000016031596668945342 and best x is 0.0005432406489841435
best value is 0.000000000048342858468375267 and best x is 0.00036428736532565057
best value is 0.0000000027258092748919238 and best x is 0.0013968994777376063
best value is 0.000000002512357508542904 and best x is 0.0013594413630785727
mean time is 912ms
There’s lots of non-idiomatic OCaml code in there, but the worse is perhaps
if Float.compare ratio g.crossRatio != 1 then ... else ...
which is a really obscure way of writing
if ratio <= g.crossRatio then .. else ...
and the compiled code is also a lot less efficient.
Also, my stomach still ache from your use of a full sort (O(n log n)) to extract the smallest element of an array (which is O(n)).
I was able to somewhat improve your original code. On my machine your original code runs in ~3900ms, the original rust code runs in ~1300ms and my version below runs in ~1800ms. I kept the sorting since this is what the rust code also does, but replaced it with quicksort
(for ease of writing) and inline the comparison function. I’m assuming you need the data to be sorted for some other processing that you removed from your sample code (otherwise, just scan the array to find the min). Also, you said you did not want to mutate the fields of Chrome.t
in cross
(the solution suggested by @Gopiandcode) because those record could be extracted. I still did it but I make a copy of the elements in the array before sorting, so updates only happen on newly allocated values that are not shared. If this is ok with you then I advise you also use in place swap in cross
and copy the Chrome.t
objects before storing them. If this is still not ok, then you can just change Chrome.swap
in partition
to just swap the two cells of the array, but you will gain less. I also tweaked the GC a bit which seems to help. If I fix the random seed, I get the exact same results as your original OCaml code.
(* compile with ocamlopt -inline 1000 -unsafe foo.ml *)
let () = Random.init 42 (* to compare results with original *)
let () = Gc.set { (Gc.get ()) with Gc.minor_heap_size = 1024 * 1024 * 16 }
let _MIN_X = 0.0
let _MAX_X = 10.0
(* [min, max[, also modified in the original code *)
let randInt ~min ~max = min + Random.int (max - min)
module Chrome = struct
type t = { mutable x : float; mutable fitness : float }
let compute_fitness x = x *. x *. x
let init_x () = Random.float (_MAX_X -. _MIN_X) +. _MIN_X
let init_with x = { x; fitness = compute_fitness x }
let init () = init_with (init_x ())
let cross c1 c2 =
let median = (c1.x +. c2.x) *. 0.5 in
let newC1 = init_with (Random.float (median -. _MIN_X) +. _MIN_X) in
let newC2 = init_with (Random.float (_MAX_X -. median) +. median) in
(newC1, newC2)
let swap c1 c2 =
let tmpx = c1.x in
let tmpf = c1.fitness in
c1.x <- c2.x;
c1.fitness <- c2.fitness;
c2.x <- tmpx;
c2.fitness <- tmpf
end
module Gen = struct
type t = {
mutable chromes : Chrome.t array;
genSize : int;
genCount : int;
mutable bestChrome : Chrome.t option;
mutRatio : float;
crossRatio : float;
}
let init g = g.chromes <- Array.init g.genSize (fun _i -> Chrome.init ())
let mutate g =
for i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if ratio <= g.mutRatio then g.chromes.(i) <- Chrome.init ()
done
let cross g =
for _i = 0 to g.genSize - 1 do
let ratio = Random.float 1. in
if ratio <= g.crossRatio then
let i1, i2 =
(randInt ~min:0 ~max:g.genSize, randInt ~min:0 ~max:g.genSize)
in
let c1, c2 = Chrome.cross g.chromes.(i1) g.chromes.(i2) in
let () = g.chromes.(i1) <- c1 in
g.chromes.(i2) <- c2
done
let rec quicksort arr low high =
if low < high then begin
let pivot_index =
partition arr low high arr.(high).Chrome.fitness (low - 1)
in
quicksort arr low (pivot_index - 1);
quicksort arr (pivot_index + 1) high
end
and partition arr j high pivot_value i =
if j < high then
let i =
if arr.(j).Chrome.fitness <= pivot_value then (
let i = i + 1 in
Chrome.swap arr.(i) arr.(j);
i)
else i
in
partition arr (j + 1) high pivot_value i
else
let i = i + 1 in
Chrome.swap arr.(i) arr.(j);
i
let quicksort arr =
for i = 0 to Array.length arr - 1 do
let c = arr.(i) in
arr.(i) <- { c with Chrome.x = c.Chrome.x }
done;
quicksort arr 0 (Array.length arr - 1)
let updateGen g =
let () = quicksort g.chromes in
g.bestChrome <- Some g.chromes.(0)
let start g =
for _i = 0 to g.genCount - 1 do
mutate g;
cross g;
updateGen g
done
end
let start () =
let gen : Gen.t =
{
chromes = ([||] : Chrome.t array);
genSize = 10000;
genCount = 1000;
bestChrome = None;
mutRatio = 0.15;
crossRatio = 0.65;
}
in
let () = gen |> Gen.init in
let () = gen |> Gen.start in
match gen.bestChrome with
| None -> ()
| Some c ->
print_endline
("best value is "
^ (c.fitness |> string_of_float)
^ " and best x is " ^ (c.x |> string_of_float))
let main () =
let testCount = 10 in
let timeStart = Sys.time () in
let () =
for _i = 0 to testCount - 1 do
start ()
done
in
let timeEnd = Sys.time () in
print_endline
("mean time is "
^ (let timeUse =
(timeEnd -. timeStart) *. 1000. /. (testCount |> float_of_int)
in
timeUse |> Float.round |> int_of_float |> string_of_int)
^ "ms")
let () = main ()
Do we have an ocaml library for fixed point arithmetic?
I have a use case where it would be nice to see if performance of very numerical
code can be accelerated using this.
I just found in opam:
hardcaml_fixed_point -- Hardcaml fixed point arithmetic
shine -- Fixed-point MP3 encoder
There might be some interesting code in there.
I have already explained why here must use sortting.
Thanks for the reply, this is what i really need, thanks again!
An interesting thing, i converted your code into Rescript, replaced Random
module with Math.random
, the code is also very efficient. (1080ms of OCaml vs 1428ms of Rescript)
Here is the code:
/* compile with ocamlopt -inline 1000 -unsafe foo.ml */
// let () = Random.init(42) /* to compare results with original */
let _MIN_X = 0.0
let _MAX_X = 10.0
/* [min, max[, also modified in the original code */
// let randInt = (~min, ~max) => min + Random.int(max - min)
let randInt = (~min, ~max) => {
let ratio = Js.Math.random()
let value = (max->Belt.Int.toFloat -. min->Belt.Int.toFloat) *. ratio
value->Js.Math.unsafe_floor_int + min
}
module Chrome = {
type t = {mutable x: float, mutable fitness: float}
let compute_fitness = x => x *. x *. x
let init_x = () => Js.Math.random() *. (_MAX_X -. _MIN_X) +. _MIN_X
let init_with = x => {x, fitness: compute_fitness(x)}
let init = () => init_with(init_x())
let cross = (c1, c2) => {
let median = (c1.x +. c2.x) *. 0.5
let newC1 = init_with(Js.Math.random() *. (median -. _MIN_X) +. _MIN_X)
let newC2 = init_with(Js.Math.random() *. (_MAX_X -. median) +. median)
(newC1, newC2)
}
let swap = (c1, c2) => {
let tmpx = c1.x
let tmpf = c1.fitness
c1.x = c2.x
c1.fitness = c2.fitness
c2.x = tmpx
c2.fitness = tmpf
}
}
module Gen = {
type t = {
mutable chromes: array<Chrome.t>,
genSize: int,
genCount: int,
mutable bestChrome: option<Chrome.t>,
mutRatio: float,
crossRatio: float,
}
let init = g => g.chromes = Array.init(g.genSize, _i => Chrome.init())
let mutate = g =>
for i in 0 to g.genSize - 1 {
let ratio = Js.Math.random()
if ratio <= g.mutRatio {
g.chromes->Belt.Array.setUnsafe(i, Chrome.init())
}
}
let cross = g =>
for _i in 0 to g.genSize - 1 {
// let ratio = Random.float(1.)
let ratio = Js.Math.random()
if ratio <= g.crossRatio {
let (i1, i2) = (randInt(~min=0, ~max=g.genSize), randInt(~min=0, ~max=g.genSize))
let (c1, c2) = Chrome.cross(
g.chromes->Belt.Array.getUnsafe(i1),
g.chromes->Belt.Array.getUnsafe(i2),
)
g.chromes->Belt.Array.setUnsafe(i1, c1)
g.chromes->Belt.Array.setUnsafe(i2, c2)
}
}
let rec quicksort = (arr, low, high) =>
if low < high {
let pivot_index = partition(
arr,
low,
high,
(arr->Belt.Array.getUnsafe(high)).Chrome.fitness,
low - 1,
)
quicksort(arr, low, pivot_index - 1)
quicksort(arr, pivot_index + 1, high)
}
and partition = (arr, j, high, pivot_value, i) =>
if j < high {
let i = if (arr->Belt.Array.getUnsafe(j)).Chrome.fitness <= pivot_value {
let i = i + 1
Chrome.swap(arr->Belt.Array.getUnsafe(i), arr->Belt.Array.getUnsafe(j))
i
} else {
i
}
partition(arr, j + 1, high, pivot_value, i)
} else {
let i = i + 1
Chrome.swap(arr->Belt.Array.getUnsafe(i), arr->Belt.Array.getUnsafe(j))
i
}
let quicksort = arr => {
for i in 0 to Array.length(arr) - 1 {
let c = arr->Belt.Array.getUnsafe(i)
arr->Belt.Array.setUnsafe(i, {...c, Chrome.x: c.Chrome.x})
}
quicksort(arr, 0, Array.length(arr) - 1)
}
let updateGen = g => {
let () = quicksort(g.chromes)
g.bestChrome = Some(g.chromes[0])
}
let start = g =>
for _i in 0 to g.genCount - 1 {
mutate(g)
cross(g)
updateGen(g)
}
}
let start = () => {
let gen: Gen.t = {
chromes: ([]: array<Chrome.t>),
genSize: 10000,
genCount: 1000,
bestChrome: None,
mutRatio: 0.15,
crossRatio: 0.65,
}
let () = gen->Gen.init
let () = gen->Gen.start
switch gen.bestChrome {
| None => ()
| Some(c) =>
print_endline(
"best value is " ++
(c.fitness->Belt.Float.toString ++
(" and best x is " ++ c.x->Belt.Float.toString)),
)
}
}
let main = () => {
let testCount = 10
let timeStart = Sys.time()
let () = for _i in 0 to testCount - 1 {
start()
}
let timeEnd = Sys.time()
print_endline(
"mean time is " ++
({
let timeUse = (timeEnd -. timeStart) *. 1000. /. testCount->float_of_int
timeUse->Js.Math.unsafe_round->string_of_int
} ++
"ms"),
)
}
main()
result
best value is 1.632582792297675e-8 and best x is 0.0025368321805161677
best value is 3.189175730351589e-13 and best x is 0.00006832182890983782
best value is 3.624163295527082e-11 and best x is 0.0003309298322899977
best value is 5.699835597420906e-12 and best x is 0.0001786298813568937
best value is 3.517319828982728e-11 and best x is 0.0003276453082897697
best value is 2.5804167236638335e-9 and best x is 0.0013716078098534256
best value is 8.417935998460742e-9 and best x is 0.0020342385170957957
best value is 6.296942359306891e-10 and best x is 0.0008571231780193898
best value is 5.52048479362766e-10 and best x is 0.0008203371998875497
best value is 8.911625410063919e-10 and best x is 0.0009623188090053025
mean time is 1428ms
I’m guessing that the biggest loss in performances when using a predefined sort is that the extracted floating point values are boxed just after extraction to be passed to the comparison function. Manually inlining <=
in the sort function allows the compiler to directly compare the extracted floats.
Fair warning: I used quicksort just as a quick and dirty hack (and also to experiment and see if a small enough sorting function would get inlined), but I did not put much thought into it. I was satisfied by seeing that on test runs it returned the same results as Array.sort
but you should double check!
Also I made the assumption that it would be called once on what appears to be a random array. If you happen to call it on an already sorted array it may exhibit quadratic behaviour (i.e. it’s not the clever quicksort that shuffles and takes care of equal pivot values).