type 'a tree =
| Node of 'a tree * 'a tree
| Leaf of 'a list
How can I build a lazy list (so, a stream or a seq, I think) that enumerates all the values in a tree (that is, contains all the values in the lists at the Leaf nodes, order not important)? Performance is important for my application (that’s why I’m using lazy streams rather than e.g. concatenating all lists). What’s the “right” way to do this? I’ve never used lazy lists before so I’m a bit lost.
PS: When I say lazy I just mean “delayed”, not “ocaml lazy” (that is, no memoisation needed).
how about with streams? I always camlp5’s streams syntax, and thus have forgotten if it’s supported in straight-up ocaml but it’s pretty easy [n.b. I didn’t actually compile/execute any of the snippets below] [n.b. #2: please note the single-quotes, which are syntactically significant and mark stream-element patterns, instead of stream-patterns]:
let rec flat = function
Node (a,b) -> [< flat a ; flat b >]
| Leaf l -> [< 'l >]
yields 'a list Stream.t. But you probably want 'a Stream.t, right? In which case, that last line becomes
| Leaf l -> stream_of_list l
let stream_of_list l =
let rec srec = function [] -> [< >] | h::t -> [< 'h ; srec t >]
in srec l
I assume you’re going to want to consume the stream at some point, so
let consume f strm =
let rec crec = parser
[< >] -> ()
| [< 'x ; strm >] -> f x ; crec strm
in crec strm
and obviously fold_left is just as straightforward.
Thanks, that seems reasonable (though I’m not convinced on the performance front), but unfortunattely it’s not an option for me since we can’t use caml4p/caml5p on our codebase.
Oh, performance-wise (since it macro-expands into a bunch of closures and such) I doubt it can be faster than just coding up something yourself that iteratively flattens that tree by rotating clockwise at the root until a leaf appears at the left-child, and then extracting a single value each time.
Maybe something like this? [n.b. I actually compiled this and tested it (once)]
let extract1 t =
let rec erec = function
| Leaf [] -> failwith "empty"
| Leaf l -> erec(Node(Leaf l, Leaf[]))
| Node(Node(a,b),c) -> erec (Node(a,Node(b,c)))
| Node(Leaf[],b) -> erec b
| Node(Leaf(h::t),b) -> h,Node(Leaf t,b)
in erec t
Ah, nice. Yes, with “flat_map” it’s rather straightforward. I can still prefer the syntactic loveliness of streams & parsers, but hey, de gustibus and all that.
If performance is tight, you can try to implement a manual conversion to Seq.t using a stack to store subtrees to explore. Example (with a custom stack to handle both leaves and internal nodes)
type 'a tree =
| Node of 'a tree * 'a tree
| Leaf of 'a list
type 'a stack =
| S_nil
| S_yield of 'a list * 'a stack
| S_sub of 'a tree * 'a stack
let to_seq (t:_ tree) : _ Seq.t =
let rec aux stack () =
match stack with
| S_nil -> Seq.Nil
| S_yield ([], stack') -> aux stack' ()
| S_yield (x :: l, stack') -> Seq.Cons (x, aux (S_yield (l, stack')))
| S_sub (t, stack') ->
match t with
| Leaf [] -> aux stack' ()
| Leaf (t1::l) -> Seq.Cons (t1, aux (S_yield (l, stack')))
| Node (t1, t2) -> aux (S_sub (t1, S_sub (t2, stack'))) ()
in aux (S_sub (t, S_nil))
let rec walk tree remaining k =
match tree, remaining with
| Node (left, right), _ -> walk left (right :: remaining) k
| Leaf (x :: xs), _ -> k x; walk (Leaf xs) remaining k
| Leaf [], tree' :: remaining' -> walk tree' remaining' k
| Leaf [], [] -> ()
And you call it on your tree with walk tree [] k. Here is a sketch using ints for simplicity
I’m a bit confused why everyone is proposing such complicated/overkill/deprecated solutions for such a simple problem.
So here are solutions for the two standard modern “lazy iterators” in OCaml.
(** Turn a tree into a Seq.t, Uses oseq for convenience.
Note the explicit unit for manual control of lazyness *)
let rec seq_of_tree
: 'a tree -> 'a Seq.t
= fun t () -> match t with
| Leaf x -> OSeq.return x ()
| Node (l,r) -> OSeq.append (seq_of_tree l) (seq_of_tree r) ()
(** Turn a tree into an Iter.t
literally equal to an iter function with argument flipped *)
let rec iter_of_tree
: 'a tree -> 'a Iter.t
= fun t k -> match t with
| Leaf x -> k x
| Node (l,r) -> iter_of_tree l k ; iter_of_tree r k