It would be awesome to have a deriving directive that takes a set of types, and generates:
a signature whose items correspond to the product types or the variants of the sum types, and each item is a function whose arguments correspond to record or tuple fields of the product types or the variants; in case of records, the function arguments are labelled;
a function / functor taking a (first-class/) module of the above signature, and implementing the fold operation over the types.
Does anyone have such a PPX rewriter? If not, would you support a project to implement it?
I knew about the first-order features of ppx_jane: @@deriving variants and @@deriving fields, but I forgot that they have the higher-order part with Variants.fold!
Alas, not only the problem that ppx_variants_conv does not generate a signature of the sort I need, but also it is not the right notion of “fold”. The whole Variants is about constructors, not about destructors AFAICT.
type _ calc =
| Lambda: {bind: 'b ref; body: 'a calc} -> ('b -> 'a) calc
| App: {func: ('a -> 'b) calc; arg: 'a calc} -> 'b calc
| Var: 'a ref -> 'a calc
| Const: 'a -> 'a calc
(* [@@deriving_inline fold_sig] *)
(* START of auto-generated code *)
module type FOLD_CALC = sig
type 'a result
val lambda: bind: 'b ref -> body: 'a result -> ('b -> 'a) result
val app: func:('a -> 'b) result -> arg: 'a result -> 'b result
val var: 'a ref -> 'a result
val const: 'a -> 'a result
end
module Fold(F: FOLD_CALC) = struct
let fold c =
let rec loop: 'a. 'a calc -> 'a F.result =
fun (type a) (calc: a calc) ->
match calc with
| Lambda {bind; body} -> (F.lambda ~bind ~body:(loop body) : a F.result)
| App {func; arg} -> F.app ~func:(loop func) ~arg:(loop arg)
| Var v -> F.var v
| Const c -> F.const c in
loop c
end
(* [@@@end] *)
(* END of auto-generated code *)
module Eval: FOLD_CALC = struct
type 'a result = 'a
let lambda ~bind ~body = fun x -> bind := x; body
let app ~func ~arg = func arg
let var v = !v
let const c = c
end
module EvalComp = Fold(Eval)
module PrintApTree: FOLD_CALC = struct
type 'a result = string
let lambda ~bind:_ ~body = "(fun <some var> -> "^body^")"
let app ~func ~arg = "("^func^" "^arg^")"
let var _ = "<some var>"
let const _ = "<some const>"
end
module PrintApTreeComp = Fold(PrintApTree)
Ah I know what you mean, multiple types together getting the signature generated. As in:
type 'a calc = [...]
and 'b arith = [...]
There’s nothing extra interesting about multiple types, except making the task more cumbersome, because now there might be a calc_result type and an arith_result type… maybe…
type _ calc =
| Lambda: {bind: 'b ref; body: 'a calc} -> ('b -> 'a) calc
| App: {func: ('a -> 'b) calc; arg: 'a calc} -> 'b calc
| Var: 'a ref -> 'a calc
| Comp: 'a arith -> 'a calc
and _ arith =
| Add_int: int arith * int arith -> int arith
| Add_float: float arith * float arith -> float arith
| Const: 'a -> 'a arith
(* [@@deriving_inline fold_sig] *)
(* START of auto-generated code *)
module type FOLD_CALC = sig
type 'a calc_result
type 'a arith_result
val lambda: bind: 'b ref -> body: 'a calc_result -> ('b -> 'a) calc_result
val app: func:('a -> 'b) calc_result -> arg: 'a calc_result -> 'b calc_result
val var: 'a ref -> 'a calc_result
val comp: 'a arith_result -> 'a calc_result
val add_int: int arith_result -> int arith_result -> int arith_result
val add_float: float arith_result -> float arith_result -> float arith_result
val const: 'a -> 'a arith_result
end
module Fold(F: FOLD_CALC) = struct
let fold c =
let rec calc_loop: 'a. 'a calc -> 'a F.calc_result =
fun (type a) (calc: a calc) ->
match calc with
| Lambda {bind; body} -> (F.lambda ~bind ~body:(calc_loop body) : a F.calc_result)
| App {func; arg} -> F.app ~func:(calc_loop func) ~arg:(calc_loop arg)
| Var v -> F.var v
| Comp c -> F.comp (arith_loop c)
and arith_loop: 'a. 'a arith -> 'a F.arith_result =
fun (type a) (arith: a arith) ->
match arith with
| Add_int (a1, a2) -> (F.add_int (arith_loop a1) (arith_loop a2): a F.arith_result)
| Add_float (a1, a2) -> F.add_float (arith_loop a1) (arith_loop a2)
| Const c -> F.const c in
calc_loop c
end
(* [@@@end] *)
(* END of auto-generated code *)
module Eval: FOLD_CALC = struct
type 'a calc_result = 'a
type 'a arith_result = 'a
let lambda ~bind ~body = fun x -> bind := x; body
let app ~func ~arg = func arg
let var v = !v
let comp c = c
let add_int i1 i2 = Int.add i1 i2
let add_float i1 i2 = Float.add i1 i2
let const c = c
end
module EvalComp = Fold(Eval)
module PrintApTree: FOLD_CALC = struct
type 'a calc_result = string
type 'a arith_result = int
let lambda ~bind:_ ~body = "(fun <some var> -> "^body^")"
let app ~func ~arg = "("^func^" "^arg^")"
let var _ = "<some var>"
let comp c = "(added "^Int.to_string c^" elements)"
let add_int i1 i2 = i1 + i2
let add_float i1 i2 = i1 + i2
let const _ = 1
end
module PrintApTreeComp = Fold(PrintApTree)
I wonder: what’s the difference between this, and ppx.deriving.fold ? I will confess that I didn’t test whether ppx.deriving.fold can handle more than one type, but at least for a single type, isn’t it about the same (only, without a signature/functor – just a plain old function) ?
Assuming that by ppx.deriving.fold you mean ppx_variants_conv.Variants.fold.
Let’s look at the README example.
module Variant = struct
type 'constructor t = {
name : string;
rank : int;
constructor : 'constructor
}
end
type 'a t =
| A of 'a
| B of char
| C
| D of int * int
[@@deriving variants]
val fold :
init: 'b
-> a:('b -> ('a -> 'a t) Variant.t -> 'c)
-> b:('c -> (char -> 'a t) Variant.t -> 'd)
-> c:('d -> ('a t) Variant.t -> 'e)
-> d:('e -> (int -> int -> 'a t) Variant.t -> 'f)
-> 'f
The corresponding less-boilerplate signature is:
sig
type b
type c
type d
type e
type f
val a: b -> ('a -> 'a t) -> c
val b: c -> (char -> 'a t) -> d
val c: d -> ('a t) -> e
val d: e -> (int -> int -> 'a t) -> f
end
The functions a, b, c, d do not have access to the content of any information in t. Nay, the fold function itself does not take a value of type t. In other words, Variants.fold is not a fold at all. Folding means destructuring a data structure. Rather, it should be called Variants.with_variant_constructors – its use is to facilitate constructing the data structure.
Oh, no, I meant ppx_deriving.fold. It’s part of the package ppx_deriving, IIRC. I"ve never heard of this Variants.fold to which you refer. Here’s an example (from the unit-test):
type 'a btree =
| Node of 'a btree * 'a * 'a btree
| Leaf [@@deriving fold]
let rec fold_btree poly_a acc =
function
| Node (a0, a1, a2) ->
let acc = let acc = (fold_btree poly_a) acc a0 in poly_a acc a1 in
(fold_btree poly_a) acc a2
| Leaf -> acc[@@ocaml.warning "-39"]
For clarity: the fold_btree function was generated. The actual source-code is just the type with the annotaiton.
Ah, indeed! I haven’t had ppx_deriving installed but I had noticed it during googling and forgot about it. It is a proper “fold”, but maybe it doesn’t have all the functionality? (Converting record fields to labels and handling two types – for the use case I’m working on.) And also, signatures are more convenient, for example it’s easier to implement plugins. And moreover:
File "lib/dune", line 10, characters 18-30:
10 | (preprocess (pps ppx_deriving ppx_jane))
^^^^^^^^^^^^
Error: Ppx dependency on a non-ppx library "ppx_deriving". If "ppx_deriving"
is in fact a ppx rewriter library, it should have (kind ppx_rewriter) in its
dune file.
OK, I finally took a close look at the code from ppx_deriving.fold, and it … doesn’t do what you want. Instead it folds over the leaves, and only the leaves. I’ll think about this; let me get back to you. This isn’t a hard problem.
It’s not conceptually hard but I’m now inclined to think it’s OK to write this code by hand, the exhaustiveness check on fold enforces correctness.
As i was thinking over your problem last night, I had a thought. Maybe this is wrong, but maybe not. I think what you’re asking for is actually not a fold at all, but rather the induction combinator. Let me explain:
Suppose we take the type:
type t = O | S t
then what’s your “fold” look like?
module type FOLD_T = sig
type result
val o : result
val s : result → result
end
module Fold(F :FOLD_T)) = struct
let fold t =
let rec t_loop : t → F.result =
fun (t : t) →
match t with
O → F.o
| S t → F.s (t_loop t)
in t_loop t
end
that’s the induction principle on the type t, right there. Let’s write it without the functor:
t_induct :
res.
o:res
s:(res → res)
t → res
and it is implicit in the match on the type t: you recover the induction combinator by combining match with recursion, and applying the recursive function to all recursive instances.
So let’s do type 'a btree = Leaf | Node of btree * 'a * btree
let bt_induct leaf node =
let rec btrec = function
Leaf → leaf
| Node(l,v,r) → node (btrec l) v (btrec r)
in btrec
What’s the type of this?
b. b → ('a btree → 'a → 'a btree → b) → 'a -btree → b
To sum up: I think there’s little value in writing a PPX rewriter to produce this: you already have it in the match construct in ML – that’s sort of the point of match.
Could your problem be solved by the visitors library? (manual: http://gallium.inria.fr/~fpottier/visitors/manual.pdf)
IIRC it provides a ppx_deriving plugin that generates classes following the “visitor” pattern that you can use to traverse and fold over your datatype in various ways.