We did such micro benchmarking. We compared the standard library Map and Hash functors, with Base’s comparator-style maps and hashtables, and the same structures that were created using the Core functors.
We were using OCaml 4.09.0, Core v0.12.0, and tried three different compilation options, (1) without any optimization, (2) with -O3 and (3) with -O3 and flambda switch.
The main takeaways. The standard library Maps and Hashtables are consistently slightly faster (~10%) than their Janestreet counterparts without flambda and are significantly faster when flambda is enabled (nearly twice as faster).
The experiment
We did a very simple test (that was representative for our problem) that inserts N integers into the map, then lookups the same integers. Each test case is represented as <Source>.<Structure>.<Size>
, where <Source>
is either Caml
(the standard library), Base
the Base library, or Core
the functor interface, e.g., Map.Make(Int)
(we also tried the Map.Int
module but didn’t find a significant difference with Map.Make(Int)
).
Here are the numbers:
1) vanilla compiler, no specific optimization options.
┌────────────────────┬─────────────┬───────────────┬───────────────┬───────────────┬────────────┐
│ Name │ Time/Run │ mWd/Run │ mjWd/Run │ Prom/Run │ Percentage │
├────────────────────┼─────────────┼───────────────┼───────────────┼───────────────┼────────────┤
│ Caml.Map:16 │ 2.52us │ 371.02w │ │ │ │
│ Caml.Map:256 │ 90.51us │ 12_737.58w │ 42.08w │ 42.08w │ 0.10% │
│ Caml.Map:65536 │ 74_726.08us │ 6_440_940.17w │ 1_733_335.64w │ 1_733_335.64w │ 80.61% │
│ Base.Map:16 │ 2.39us │ 563.03w │ 0.13w │ 0.13w │ │
│ Base.Map:256 │ 91.12us │ 19_004.83w │ 53.00w │ 53.00w │ 0.10% │
│ Base.Map:65536 │ 82_342.61us │ 9_626_675.28w │ 1_786_030.13w │ 1_786_030.13w │ 88.82% │
│ Core.Map:16 │ 2.78us │ 557.03w │ 0.14w │ 0.14w │ │
│ Core.Map:256 │ 103.61us │ 18_998.83w │ 53.22w │ 53.22w │ 0.11% │
│ Core.Map:65536 │ 92_704.59us │ 9_626_671.81w │ 1_784_322.55w │ 1_784_322.55w │ 100.00% │
│ Caml.Hashtbl:16 │ 2.01us │ 177.01w │ │ │ │
│ Caml.Hashtbl:256 │ 43.79us │ 2_815.08w │ 12.40w │ 12.40w │ 0.05% │
│ Caml.Hashtbl:65536 │ 17_783.56us │ 590_934.33w │ 392_225.13w │ 262_163.13w │ 19.18% │
│ Base.Hashtbl:16 │ 2.16us │ 108.00w │ │ │ │
│ Base.Hashtbl:256 │ 50.56us │ 2_547.07w │ 16.21w │ 16.21w │ 0.05% │
│ Base.Hashtbl:65536 │ 21_912.52us │ 522_319.67w │ 651_433.70w │ 520_865.70w │ 23.64% │
│ Core.Hashtbl:16 │ 2.15us │ 100.00w │ │ │ │
│ Core.Hashtbl:256 │ 51.14us │ 2_539.07w │ 16.84w │ 16.84w │ 0.06% │
│ Core.Hashtbl:65536 │ 22_198.09us │ 522_311.89w │ 651_436.44w │ 520_868.44w │ 23.94% │
└────────────────────┴─────────────┴───────────────┴───────────────┴───────────────┴────────────┘
2) vanilla compiler, -O3 option
┌────────────────────┬─────────────┬───────────────┬───────────────┬───────────────┬────────────┐
│ Name │ Time/Run │ mWd/Run │ mjWd/Run │ Prom/Run │ Percentage │
├────────────────────┼─────────────┼───────────────┼───────────────┼───────────────┼────────────┤
│ Caml.Map:16 │ 2.63us │ 371.02w │ │ │ │
│ Caml.Map:256 │ 95.79us │ 12_737.58w │ 41.88w │ 41.88w │ 0.11% │
│ Caml.Map:65536 │ 77_185.38us │ 6_440_940.17w │ 1_733_335.64w │ 1_733_335.64w │ 89.80% │
│ Base.Map:16 │ 2.44us │ 563.03w │ 0.13w │ 0.13w │ │
│ Base.Map:256 │ 90.29us │ 19_004.83w │ 52.90w │ 52.90w │ 0.11% │
│ Base.Map:65536 │ 81_299.52us │ 9_626_675.08w │ 1_786_544.39w │ 1_786_544.39w │ 94.59% │
│ Core.Map:16 │ 2.69us │ 557.03w │ 0.14w │ 0.14w │ │
│ Core.Map:256 │ 101.39us │ 18_998.83w │ 52.94w │ 52.94w │ 0.12% │
│ Core.Map:65536 │ 85_951.43us │ 9_626_672.41w │ 1_782_706.67w │ 1_782_706.67w │ 100.00% │
│ Caml.Hashtbl:16 │ 1.95us │ 177.01w │ │ │ │
│ Caml.Hashtbl:256 │ 43.34us │ 2_815.08w │ 12.42w │ 12.42w │ 0.05% │
│ Caml.Hashtbl:65536 │ 18_014.40us │ 590_933.91w │ 392_225.34w │ 262_163.34w │ 20.96% │
│ Base.Hashtbl:16 │ 2.16us │ 108.00w │ │ │ │
│ Base.Hashtbl:256 │ 50.72us │ 2_547.07w │ 16.13w │ 16.13w │ 0.06% │
│ Base.Hashtbl:65536 │ 22_109.95us │ 522_319.57w │ 651_427.30w │ 520_859.30w │ 25.72% │
│ Core.Hashtbl:16 │ 2.14us │ 100.00w │ │ │ │
│ Core.Hashtbl:256 │ 49.79us │ 2_539.07w │ 16.84w │ 16.84w │ 0.06% │
│ Core.Hashtbl:65536 │ 21_654.90us │ 522_311.75w │ 651_438.06w │ 520_870.06w │ 25.19% │
└────────────────────┴─────────────┴───────────────┴───────────────┴───────────────┴────────────┘
3) flambda compiler, -O3
┌────────────────────┬─────────────────┬───────────────┬───────────────┬───────────────┬────────────┐
│ Name │ Time/Run │ mWd/Run │ mjWd/Run │ Prom/Run │ Percentage │
├────────────────────┼─────────────────┼───────────────┼───────────────┼───────────────┼────────────┤
│ Caml.Map:16 │ 882.79ns │ 360.02w │ │ │ │
│ Caml.Map:256 │ 39_562.08ns │ 12_726.58w │ 41.68w │ 41.68w │ 0.05% │
│ Caml.Map:65536 │ 53_809_874.70ns │ 6_440_930.79w │ 1_735_117.84w │ 1_735_117.84w │ 64.61% │
│ Base.Map:16 │ 1_815.46ns │ 546.03w │ 0.12w │ 0.12w │ │
│ Base.Map:256 │ 79_654.15ns │ 18_987.81w │ 53.00w │ 53.00w │ 0.10% │
│ Base.Map:65536 │ 79_160_991.15ns │ 9_626_659.73w │ 1_787_850.52w │ 1_787_850.52w │ 95.04% │
│ Core.Map:16 │ 2_102.74ns │ 546.03w │ 0.12w │ 0.12w │ │
│ Core.Map:256 │ 90_244.58ns │ 18_987.81w │ 52.78w │ 52.78w │ 0.11% │
│ Core.Map:65536 │ 83_290_490.19ns │ 9_626_659.10w │ 1_786_810.45w │ 1_786_810.45w │ 100.00% │
│ Caml.Hashtbl:16 │ 457.38ns │ 150.00w │ │ │ │
│ Caml.Hashtbl:256 │ 13_812.19ns │ 2_548.06w │ 11.49w │ 11.49w │ 0.02% │
│ Caml.Hashtbl:65536 │ 10_470_655.56ns │ 525_380.79w │ 392_212.18w │ 262_150.18w │ 12.57% │
│ Base.Hashtbl:16 │ 1_513.58ns │ 89.00w │ │ │ │
│ Base.Hashtbl:256 │ 40_135.58ns │ 2_528.09w │ 15.58w │ 15.58w │ 0.05% │
│ Base.Hashtbl:65536 │ 19_374_446.48ns │ 522_301.02w │ 650_583.30w │ 520_015.30w │ 23.26% │
│ Core.Hashtbl:16 │ 1_534.87ns │ 89.00w │ │ │ │
│ Core.Hashtbl:256 │ 40_766.55ns │ 2_528.09w │ 15.58w │ 15.58w │ 0.05% │
│ Core.Hashtbl:65536 │ 19_377_124.52ns │ 522_300.77w │ 650_544.29w │ 519_976.29w │ 23.26% │
└────────────────────┴─────────────────┴───────────────┴───────────────┴───────────────┴────────────┘
Conclusions
As we can see, with flambda both Map and Hash tables of the standard library are significantly faster, 53 ms vs 92 ms for maps and 10 ms vs 19 ms for Hashtbls. We can also see that Janestreet times didn’t change between flambda and non-flambda version hence the increased difference. We can’t really tell whether it is because flambda has more opportunities for inlining for functors or because the flambda version of the JS library was compiled without the optimization information, while the standard library build system is somehow specialized for flambda. (It is important to notice, that when an flambda switch is used but -ON
is not specified with N>=1
the generated code doesn’t differ in performance from the code generated by the normal compiler, therefore it is usually a good idea to set -O2 by default when packaging your project).
Source code
open! Core
open Core_bench
let benchmark = Bench.Test.create_indexed ~args:[ 16; 256; 65536 ]
module type Ints = sig
type t
val empty : unit -> t
val add : int -> t -> t
val mem : int -> t -> bool
end
module type Tests = sig
type t
val create : int array -> t
val count : int array -> t -> int
end
module MakeTest(Map : Ints) : Tests = struct
type t = Map.t
let create inputs =
let init = Map.empty () in
Array.fold inputs ~init ~f:(fun map x -> Map.add x map)
let count inputs map =
Array.fold inputs ~init:0 ~f:(fun sum x ->
if Map.mem x map then sum + 1 else sum)
end
module SM = MakeTest(struct
module Map = Caml.Map.Make(Int)
type t = unit Map.t
let empty () = Map.empty
let add k m = Map.add k () m
let mem = Map.mem
end)
module CM = MakeTest(struct
module Map = Map.Make(Int)
type t = unit Map.t
let empty () = Map.empty
let add k m = Map.set ~key:k ~data:() m
let mem k m = Map.mem m k
end)
module BM = MakeTest(struct
type t = unit Base.Map.M(Int).t
let empty () = Base.Map.empty (module Int)
let add k m = Base.Map.set ~key:k ~data:() m
let mem k m = Map.mem m k
end)
module BH = MakeTest(struct
type t = unit Base.Hashtbl.M(Int).t
let empty () = Base.Hashtbl.create ~size:16 (module Int)
let add k m = Base.Hashtbl.set m ~key:k ~data:(); m
let mem k m = Base.Hashtbl.mem m k
end)
module SH = MakeTest(struct
module H = Caml.Hashtbl.Make(Int)
type t = unit H.t
let empty () = H.create 16
let add k m = H.add m k (); m
let mem k m = H.mem m k
end)
module CH = MakeTest(struct
module H = Hashtbl.Make(Int)
type t = unit H.t
let empty () = H.create ~size:16 ()
let add k m = H.set m k (); m
let mem k m = H.mem m k
end)
let (>:) name (module Test : Tests) =
benchmark ~name @@ fun len ->
let input = Array.init len ~f:ident in
let random_state = Base.Random.State.make [|len|] in
Array.permute ~random_state input;
stage (fun () ->
let map = Test.create input in
assert (Test.count input map = Array.length input))
let () =
Command.run @@ Bench.make_command [
"Caml.Map" >: (module SM);
"Base.Map" >: (module BM);
"Core.Map" >: (module CM);
"Caml.Hashtbl" >: (module SH);
"Base.Hashtbl" >: (module BH);
"Core.Hashtbl" >: (module CH);
]
Compiled with
ocamlfind ocamlopt -O3 -thread -package core_bench bench.ml -linkpkg -o bench