It would be awesome to have a deriving directive that takes a set of types, and generates:

a signature whose items correspond to the product types or the variants of the sum types, and each item is a function whose arguments correspond to record or tuple fields of the product types or the variants; in case of records, the function arguments are labelled;

a function / functor taking a (first-class/) module of the above signature, and implementing the fold operation over the types.

Does anyone have such a PPX rewriter? If not, would you support a project to implement it?

I knew about the first-order features of ppx_jane: @@deriving variants and @@deriving fields, but I forgot that they have the higher-order part with Variants.fold!

Alas, not only the problem that ppx_variants_conv does not generate a signature of the sort I need, but also it is not the right notion of â€śfoldâ€ť. The whole Variants is about constructors, not about destructors AFAICT.

type _ calc =
| Lambda: {bind: 'b ref; body: 'a calc} -> ('b -> 'a) calc
| App: {func: ('a -> 'b) calc; arg: 'a calc} -> 'b calc
| Var: 'a ref -> 'a calc
| Const: 'a -> 'a calc
(* [@@deriving_inline fold_sig] *)
(* START of auto-generated code *)
module type FOLD_CALC = sig
type 'a result
val lambda: bind: 'b ref -> body: 'a result -> ('b -> 'a) result
val app: func:('a -> 'b) result -> arg: 'a result -> 'b result
val var: 'a ref -> 'a result
val const: 'a -> 'a result
end
module Fold(F: FOLD_CALC) = struct
let fold c =
let rec loop: 'a. 'a calc -> 'a F.result =
fun (type a) (calc: a calc) ->
match calc with
| Lambda {bind; body} -> (F.lambda ~bind ~body:(loop body) : a F.result)
| App {func; arg} -> F.app ~func:(loop func) ~arg:(loop arg)
| Var v -> F.var v
| Const c -> F.const c in
loop c
end
(* [@@@end] *)
(* END of auto-generated code *)
module Eval: FOLD_CALC = struct
type 'a result = 'a
let lambda ~bind ~body = fun x -> bind := x; body
let app ~func ~arg = func arg
let var v = !v
let const c = c
end
module EvalComp = Fold(Eval)
module PrintApTree: FOLD_CALC = struct
type 'a result = string
let lambda ~bind:_ ~body = "(fun <some var> -> "^body^")"
let app ~func ~arg = "("^func^" "^arg^")"
let var _ = "<some var>"
let const _ = "<some const>"
end
module PrintApTreeComp = Fold(PrintApTree)

Ah I know what you mean, multiple types together getting the signature generated. As in:

type 'a calc = [...]
and 'b arith = [...]

Thereâ€™s nothing extra interesting about multiple types, except making the task more cumbersome, because now there might be a calc_result type and an arith_result typeâ€¦ maybeâ€¦

type _ calc =
| Lambda: {bind: 'b ref; body: 'a calc} -> ('b -> 'a) calc
| App: {func: ('a -> 'b) calc; arg: 'a calc} -> 'b calc
| Var: 'a ref -> 'a calc
| Comp: 'a arith -> 'a calc
and _ arith =
| Add_int: int arith * int arith -> int arith
| Add_float: float arith * float arith -> float arith
| Const: 'a -> 'a arith
(* [@@deriving_inline fold_sig] *)
(* START of auto-generated code *)
module type FOLD_CALC = sig
type 'a calc_result
type 'a arith_result
val lambda: bind: 'b ref -> body: 'a calc_result -> ('b -> 'a) calc_result
val app: func:('a -> 'b) calc_result -> arg: 'a calc_result -> 'b calc_result
val var: 'a ref -> 'a calc_result
val comp: 'a arith_result -> 'a calc_result
val add_int: int arith_result -> int arith_result -> int arith_result
val add_float: float arith_result -> float arith_result -> float arith_result
val const: 'a -> 'a arith_result
end
module Fold(F: FOLD_CALC) = struct
let fold c =
let rec calc_loop: 'a. 'a calc -> 'a F.calc_result =
fun (type a) (calc: a calc) ->
match calc with
| Lambda {bind; body} -> (F.lambda ~bind ~body:(calc_loop body) : a F.calc_result)
| App {func; arg} -> F.app ~func:(calc_loop func) ~arg:(calc_loop arg)
| Var v -> F.var v
| Comp c -> F.comp (arith_loop c)
and arith_loop: 'a. 'a arith -> 'a F.arith_result =
fun (type a) (arith: a arith) ->
match arith with
| Add_int (a1, a2) -> (F.add_int (arith_loop a1) (arith_loop a2): a F.arith_result)
| Add_float (a1, a2) -> F.add_float (arith_loop a1) (arith_loop a2)
| Const c -> F.const c in
calc_loop c
end
(* [@@@end] *)
(* END of auto-generated code *)
module Eval: FOLD_CALC = struct
type 'a calc_result = 'a
type 'a arith_result = 'a
let lambda ~bind ~body = fun x -> bind := x; body
let app ~func ~arg = func arg
let var v = !v
let comp c = c
let add_int i1 i2 = Int.add i1 i2
let add_float i1 i2 = Float.add i1 i2
let const c = c
end
module EvalComp = Fold(Eval)
module PrintApTree: FOLD_CALC = struct
type 'a calc_result = string
type 'a arith_result = int
let lambda ~bind:_ ~body = "(fun <some var> -> "^body^")"
let app ~func ~arg = "("^func^" "^arg^")"
let var _ = "<some var>"
let comp c = "(added "^Int.to_string c^" elements)"
let add_int i1 i2 = i1 + i2
let add_float i1 i2 = i1 + i2
let const _ = 1
end
module PrintApTreeComp = Fold(PrintApTree)

I wonder: whatâ€™s the difference between this, and ppx.deriving.fold ? I will confess that I didnâ€™t test whether ppx.deriving.fold can handle more than one type, but at least for a single type, isnâ€™t it about the same (only, without a signature/functor â€“ just a plain old function) ?

Assuming that by ppx.deriving.fold you mean ppx_variants_conv.Variants.fold.

Letâ€™s look at the README example.

module Variant = struct
type 'constructor t = {
name : string;
rank : int;
constructor : 'constructor
}
end
type 'a t =
| A of 'a
| B of char
| C
| D of int * int
[@@deriving variants]
val fold :
init: 'b
-> a:('b -> ('a -> 'a t) Variant.t -> 'c)
-> b:('c -> (char -> 'a t) Variant.t -> 'd)
-> c:('d -> ('a t) Variant.t -> 'e)
-> d:('e -> (int -> int -> 'a t) Variant.t -> 'f)
-> 'f

The corresponding less-boilerplate signature is:

sig
type b
type c
type d
type e
type f
val a: b -> ('a -> 'a t) -> c
val b: c -> (char -> 'a t) -> d
val c: d -> ('a t) -> e
val d: e -> (int -> int -> 'a t) -> f
end

The functions a, b, c, d do not have access to the content of any information in t. Nay, the fold function itself does not take a value of type t. In other words, Variants.fold is not a fold at all. Folding means destructuring a data structure. Rather, it should be called Variants.with_variant_constructors â€“ its use is to facilitate constructing the data structure.

Oh, no, I meant ppx_deriving.fold. Itâ€™s part of the package ppx_deriving, IIRC. I"ve never heard of this Variants.fold to which you refer. Hereâ€™s an example (from the unit-test):

type 'a btree =
| Node of 'a btree * 'a * 'a btree
| Leaf [@@deriving fold]
let rec fold_btree poly_a acc =
function
| Node (a0, a1, a2) ->
let acc = let acc = (fold_btree poly_a) acc a0 in poly_a acc a1 in
(fold_btree poly_a) acc a2
| Leaf -> acc[@@ocaml.warning "-39"]

For clarity: the fold_btree function was generated. The actual source-code is just the type with the annotaiton.

Ah, indeed! I havenâ€™t had ppx_deriving installed but I had noticed it during googling and forgot about it. It is a proper â€śfoldâ€ť, but maybe it doesnâ€™t have all the functionality? (Converting record fields to labels and handling two types â€“ for the use case Iâ€™m working on.) And also, signatures are more convenient, for example itâ€™s easier to implement plugins. And moreover:

File "lib/dune", line 10, characters 18-30:
10 | (preprocess (pps ppx_deriving ppx_jane))
^^^^^^^^^^^^
Error: Ppx dependency on a non-ppx library "ppx_deriving". If "ppx_deriving"
is in fact a ppx rewriter library, it should have (kind ppx_rewriter) in its
dune file.

OK, I finally took a close look at the code from ppx_deriving.fold, and it â€¦ doesnâ€™t do what you want. Instead it folds over the leaves, and only the leaves. Iâ€™ll think about this; let me get back to you. This isnâ€™t a hard problem.

Itâ€™s not conceptually hard but Iâ€™m now inclined to think itâ€™s OK to write this code by hand, the exhaustiveness check on fold enforces correctness.

As i was thinking over your problem last night, I had a thought. Maybe this is wrong, but maybe not. I think what youâ€™re asking for is actually not a fold at all, but rather the induction combinator. Let me explain:

Suppose we take the type:

type t = O | S t

then whatâ€™s your â€śfoldâ€ť look like?

module type FOLD_T = sig
type result
val o : result
val s : result â†’ result
end

module Fold(F :FOLD_T)) = struct
let fold t =
let rec t_loop : t â†’ F.result =
fun (t : t) â†’
match t with
O â†’ F.o
| S t â†’ F.s (t_loop t)
in t_loop t
end

thatâ€™s the induction principle on the type t, right there. Letâ€™s write it without the functor:

t_induct :
res.
o:res
s:(res â†’ res)
t â†’ res

and it is implicit in the match on the type t: you recover the induction combinator by combining match with recursion, and applying the recursive function to all recursive instances.

So letâ€™s do type 'a btree = Leaf | Node of btree * 'a * btree

let bt_induct leaf node =
let rec btrec = function
Leaf â†’ leaf
| Node(l,v,r) â†’ node (btrec l) v (btrec r)
in btrec

Whatâ€™s the type of this?

b. b â†’ ('a btree â†’ 'a â†’ 'a btree â†’ b) â†’ 'a -btree â†’ b

To sum up: I think thereâ€™s little value in writing a PPX rewriter to produce this: you already have it in the match construct in ML â€“ thatâ€™s sort of the point of match.

Could your problem be solved by the visitors library? (manual: http://gallium.inria.fr/~fpottier/visitors/manual.pdf)
IIRC it provides a ppx_deriving plugin that generates classes following the â€śvisitorâ€ť pattern that you can use to traverse and fold over your datatype in various ways.