I was experimenting with monads, and discovered that CPS monad performs slower than manual CPS transformation. It seems the case for OCaml, Racket and Chicken Scheme. In OCaml the difference can be 2-3x. In GHC they perform exactly the same
Do we have a reasonable explanation? Is it a missing optimization or an artifact of missing CPS-based inner representation? Maybe my code is wrong or I am measuring wrong thing?
Manual CPS
let fib_cps =
let rec helper n k =
if n <= 1 then k 1
else helper (n - 1) (fun p1 -> helper (n - 2) (fun p2 -> k (p1 + p2)))
in
fun n -> helper n Fun.id
CPS monad
module Cont =
struct
type ('a, 'b) cont = 'a -> 'b
type 'a t = { cont : 'b. ('a, 'b) cont -> 'b } [@@unboxed]
let return (x : 'a) = { cont = (fun k -> k x) } [@@inline always]
let ( >>= ) (x : 'a t) (f : 'a -> 'b t) : 'b t =
{ cont = (fun k -> x.cont (fun v -> (f v).cont k)) }
[@@inline always]
let error = failwith
let run_cont f { cont } = cont f [@@inline always]
module Syntax = struct
let ( let* ) = ( >>= )
end
end
let fib_cps_m n =
let open Cont in
let open Cont.Syntax in
let rec helper n =
if n <= 1 then return 1
else
let* l = helper (n - 1) in
let* r = helper (n - 2) in
return (l + r)
in
run_cont Fun.id (helper n)
Update: it is 4.14.1+flambda
-dlambda output
fib_cps/298 =
(letrec
(helper/299
(function n/300[int] k/301
(if (<= n/300 1) (apply k/301 1)
(apply helper/299 (- n/300 1)
(function p1/302[int]
(apply helper/299 (- n/300 2)
(function p2/303[int] (apply k/301 (+ p1/302 p2/303)))))))))
(function n/304[int] : int
(apply helper/299 n/304 (function prim/410 stub prim/410))))
fib_cps_m/311 =
(function n/313[int] : int
(letrec
(helper/314
(function n/315[int]
(if (<= n/315 1) (apply (field 0 Cont/297) 1)
(apply (field 0 (field 4 Cont/297))
(apply helper/314 (- n/315 1))
(function l/316[int]
(apply (field 0 (field 4 Cont/297))
(apply helper/314 (- n/315 2))
(function r/317[int]
(apply (field 0 Cont/297) (+ l/316 r/317)))))))))
(apply (field 3 Cont/297) (function prim/411 stub prim/411)
(apply helper/314 n/313))))
Update: flambda output in the repo