let profile time _n times =
let total_time = ref 0. in
let res = ref 0 in
for i = 1 to times do
let start = time () in
res := !res + i;
let stop = time () in
total_time := !total_time +. (stop -. start)
done;
(!total_time, !res)
let sys_profile = profile Sys.time
let unix_profile = profile Unix.gettimeofday
let no_profile = profile (fun () -> 0.)
let clock_profile _n times =
let total_time = ref 0L in
let res = ref 0 in
for i = 1 to times do
let start = Mtime_clock.elapsed_ns () in
res := !res + i;
let stop = Mtime_clock.elapsed_ns () in
total_time := Int64.add !total_time (Int64.sub stop start)
done;
(!total_time, !res)
let counter_profile _n times =
let counter = Mtime_clock.counter () in
let total_time = ref Mtime.Span.zero in
let res = ref 0 in
for i = 1 to times do
let start = Mtime_clock.count counter in
res := !res + i;
let stop = Mtime_clock.count counter in
total_time := Mtime.Span.add !total_time (Mtime.Span.abs_diff stop start)
done;
(!total_time, !res)
type Runtime_events.User.tag += CustomSpan
let count_span =
Runtime_events.User.register "count.span" CustomSpan Runtime_events.Type.span
let span_event_handler, get_time =
let time = ref 0L in
( (fun _domain_id ts event value ->
match (Runtime_events.User.tag event, value) with
| CustomSpan, Runtime_events.Type.Begin ->
time := Runtime_events.Timestamp.to_int64 ts
| CustomSpan, End ->
time := Int64.sub (Runtime_events.Timestamp.to_int64 ts) !time
| _ -> ()),
fun () -> !time )
let runtime_event_profile _n times =
Runtime_events.start ();
let cursor = Runtime_events.create_cursor None in
let callbacks =
Runtime_events.(
Callbacks.create ()
|> Callbacks.add_user_event Type.span span_event_handler)
in
let total_time = ref 0L in
let res = ref 0 in
for i = 1 to times do
Runtime_events.User.write count_span Begin;
res := !res + i;
Runtime_events.User.write count_span End;
ignore (Runtime_events.read_poll cursor callbacks None);
total_time := Int64.add !total_time (get_time ())
done;
(!total_time, !res)
let () =
let times = try int_of_string Sys.argv.(2) with _ -> 100 in
let n = try int_of_string Sys.argv.(3) with _ -> 40 in
let total_time =
match Sys.argv.(1) with
| "unix" ->
let total_time, _ = unix_profile n times in
total_time *. 1_000_000_000.
| "sys" ->
let total_time, _ = sys_profile n times in
total_time *. 1_000_000_000.
| "clock" ->
let total_time, _ = clock_profile n times in
let total_time = Int64.to_float total_time in
total_time
| "counter" ->
let total_time, _ = counter_profile n times in
Mtime.Span.to_float_ns total_time
| "runtime" ->
let total_time, _ = runtime_event_profile n times in
let total_time = Int64.to_float total_time in
total_time
| _ -> no_profile n times |> fst
in
let mean_time = total_time /. float times in
Format.eprintf "total: %.2f, mean: %.2f@." total_time mean_time
Where I’m trying to profile the same simple addition to see which time computation is the fastest. Here are the results:
❯ hyperfine --warmup 3 -N -L profiler no,clock,counter,unix,runtime './_build/default/bin/main.exe {profiler} 5000000 30'
Benchmark 1: ./_build/default/bin/main.exe no 5000000 30
Time (mean ± σ): 14.6 ms ± 0.3 ms [User: 13.7 ms, System: 0.7 ms]
Range (min … max): 14.2 ms … 16.4 ms 202 runs
Benchmark 2: ./_build/default/bin/main.exe clock 5000000 30
Time (mean ± σ): 229.8 ms ± 1.7 ms [User: 227.2 ms, System: 0.8 ms]
Range (min … max): 227.7 ms … 232.8 ms 13 runs
Benchmark 3: ./_build/default/bin/main.exe counter 5000000 30
Time (mean ± σ): 243.8 ms ± 2.1 ms [User: 241.0 ms, System: 0.7 ms]
Range (min … max): 240.9 ms … 247.9 ms 12 runs
Benchmark 4: ./_build/default/bin/main.exe unix 5000000 30
Time (mean ± σ): 259.2 ms ± 1.3 ms [User: 256.0 ms, System: 1.0 ms]
Range (min … max): 257.6 ms … 261.7 ms 11 runs
Benchmark 5: ./_build/default/bin/main.exe sys 5000000 30
Time (mean ± σ): 3.632 s ± 0.028 s [User: 0.542 s, System: 3.069 s]
Range (min … max): 3.604 s … 3.688 s 10 runs
Benchmark 5: ./_build/default/bin/main.exe runtime 5000000 30
Time (mean ± σ): 2.791 s ± 0.041 s [User: 2.755 s, System: 0.017 s]
Range (min … max): 2.753 s … 2.850 s 10 runs
I may be doing it wrong but using custom runtime events is quite slow, I expected to not pay such a toll when using them. Is there a way of improving it or is this expected when using runtime events?
Not an answer to your question but a section on profiling was recently added to the manual. I don’t know if it exists in a rendered state somewhere yet but it will as soon as 5.4 is released.
So the issue here is that Runtime_events isn’t really designed for you to read from the ring buffer in a loop like that. The idea is that you write events frequently and then only read them periodically or in a different domain or even process. If you remove the read from within the loop then the performance is roughly the same as unix/clock/counter (though sys is still faster):
./_build/default/prog.exe no 500000 30 ran
135.18 ± 19.54 times faster than ./_build/default/prog.exe sys 500000 30
397.38 ± 57.87 times faster than ./_build/default/prog.exe runtime 500000 30
402.06 ± 58.32 times faster than ./_build/default/prog.exe unix 500000 30
405.97 ± 58.75 times faster than ./_build/default/prog.exe clock 500000 30
413.68 ± 59.74 times faster than ./_build/default/prog.exe counter 500000 30
Profiling shows that nearly all the time in runtime is spent in clock_gettime so the similarity is understandable.
Interestingly my numbers look quite different from the ones you have. What OS are you doing this on? (I am running this on an Ubuntu 24.04/Linux 6.11 system)
Ok so it looks like my polling is not fast enough with domains which leads to wrong results. The issue here is that I have a spawned domain that’s supposed to poll continuously but my operation is too fast and the ring-buffer reaches its end before I can pull it.
let my_op res i = res := !res + i
(* Unix.sleepf 0.01 *)
let profile time _n times =
let total_time = ref 0. in
let res = ref 0 in
for i = 1 to times do
let start = time () in
my_op res i;
let stop = time () in
total_time := !total_time +. (stop -. start)
done;
(!total_time, !res)
let sys_profile = profile Sys.time
let unix_profile = profile Unix.gettimeofday
let no_profile = profile (fun () -> 0.)
let clock_profile _n times =
let total_time = ref 0L in
let res = ref 0 in
for i = 1 to times do
let start = Mtime_clock.elapsed_ns () in
my_op res i;
let stop = Mtime_clock.elapsed_ns () in
total_time := Int64.add !total_time (Int64.sub stop start)
done;
(!total_time, !res)
let counter_profile _n times =
let counter = Mtime_clock.counter () in
let total_time = ref Mtime.Span.zero in
let res = ref 0 in
for i = 1 to times do
let start = Mtime_clock.count counter in
my_op res i;
let stop = Mtime_clock.count counter in
total_time := Mtime.Span.add !total_time (Mtime.Span.abs_diff stop start)
done;
(!total_time, !res)
type Runtime_events.User.tag += CustomSpan
type span = Begin | End
let to_int = function Begin -> 0 | End -> 1
let of_int = function 0 -> Begin | 1 -> End | _ -> assert false
let custom_span_encoding =
let encode buffer (count, span) =
Bytes.set_int64_ne buffer 0 count;
Bytes.set_int8 buffer 8 (to_int span);
9
in
let decode buffer _size =
let count = Bytes.get_int64_ne buffer 0 in
let span = Bytes.get_int8 buffer 8 |> of_int in
(count, span)
in
Runtime_events.Type.register ~encode ~decode
let count_span =
Runtime_events.User.register "count.span" CustomSpan custom_span_encoding
let span_event_handler, get_time =
let prev_time = ref 0L in
let total_time = ref 0L in
let prev_count = ref 0L in
( (fun _domain_id ts event value ->
match (Runtime_events.User.tag event, value) with
| CustomSpan, (count, Begin) ->
prev_time := Runtime_events.Timestamp.to_int64 ts;
prev_count := count
| CustomSpan, (count, End) ->
if not @@ Int64.equal !prev_count count then
failwith
(Printf.sprintf "Prev: %d, Current: %d" (Int64.to_int !prev_count)
(Int64.to_int count));
total_time :=
Int64.add !total_time
(Int64.sub (Runtime_events.Timestamp.to_int64 ts) !prev_time)
| _ -> ()),
fun () -> !total_time )
let runtime_event_profile _n times =
let res = ref 0 in
for i = 1 to times do
Runtime_events.User.write count_span (Int64.of_int i, Begin);
my_op res i;
Runtime_events.User.write count_span (Int64.of_int i, End)
done;
!res
let finished = ref false
let read_poll _pid _path =
let cursor = Runtime_events.create_cursor None in
let callbacks =
Runtime_events.(
Callbacks.create ()
|> Callbacks.add_user_event custom_span_encoding span_event_handler)
in
fun () ->
Fun.protect
~finally:(fun () ->
Runtime_events.read_poll cursor callbacks None |> ignore;
Runtime_events.free_cursor cursor)
(fun () ->
while not !finished do
Runtime_events.read_poll cursor callbacks None |> ignore
done;
get_time ())
let () =
Runtime_events.start ();
let times = try int_of_string Sys.argv.(2) with _ -> 100 in
let n = try int_of_string Sys.argv.(3) with _ -> 40 in
let total_time =
match Sys.argv.(1) with
| "unix" ->
let total_time, _ = unix_profile n times in
total_time *. 1_000_000_000.
| "sys" ->
let total_time, _ = sys_profile n times in
total_time *. 1_000_000_000.
| "clock" ->
let total_time, _ = clock_profile n times in
let total_time = Int64.to_float total_time in
total_time
| "counter" ->
let total_time, _ = counter_profile n times in
Mtime.Span.to_float_ns total_time
| "runtime" ->
let main_pid = Unix.getpid () in
let path = Runtime_events.path () |> Option.get in
let d = Domain.spawn (read_poll main_pid path) in
let _ = runtime_event_profile n times in
finished := true;
let total_time = Domain.join d in
total_time |> Int64.to_float
| _ -> no_profile n times |> fst
in
let mean_time = total_time /. float times in
Format.eprintf "total: %.2f, mean: %.2f@." total_time mean_time