Parse tree structure with Angstrom

Hello.

I need a help with my solution below. My code works but I think it is kinda bloated and there might be an easier solution with Angstrom usage or list/stack solution. Please help me if there are easier/simpler solutions.

Problem

I would like to parse a text file below with Angstrom and the type definition specified.

File structure

It’s similar to file/folder or yaml structure (line by line). Whenever you have increased indent there will be new Node otherwise it will be a Leaf.

1
  1.1
  1.2
    1.2.1
      1.2.1.1
    1.2.2
2
  2.1

Type definition

type t =
  | Leaf of string
  | Node of (string * t list)

My solution

  • First I am reading file line by line with angstrom and then I extract lines to (level, name) list. For the tree above it becomes:
[(1, "1"); (2, "1.1"); (2, "1.2"); (3, "1.2.1"); (4, "1.2.1.1"); (3, "1.2.2"); (1, "2"); (2, "2.1")]
  • Then I loop in that list. Whenever level increased I push it to the Stack. And whenever the level decreased I pull items from the stack and generate list for that level. And I do this operation recursively.

Code

open Angstrom

type t =
  | Leaf of string
  | Node of (string * t list) [@@deriving show]

let is_space = function
  | '\x20'
  | '\x09' -> true
  | _ -> false
let is_eol = function
  | '\r'
  | '\n' -> true
  | _ -> false
let is_token = function
  | '\000' .. '\031'
  | '\127' -> false
  | _ -> true

let level = take_while1 is_space >>= fun s -> return @@ (String.length s) / 2 + 1
let token = take_while1 is_token <* skip_while is_eol
let pair x y = (x, y)

let lex =
  fix (fun _expr ->
    many (lift2 pair (level <|> return 1) token)
  )

let create_node name list = Node (name, list)
let create_leaf name = Leaf name

let rec pop_stack list stack until level =
  match Stack.is_empty stack with
  | true -> (list, stack)
  | false ->
    let ((level', name') as item', list') = Stack.pop stack in
    match until = level' with
    | true -> (Stack.push (item', list) stack); (list, stack)
    | false ->
        if level' < level then
          let node = create_node name' list in
          pop_stack [node] stack until level'
        else
          if List.length list' > 0 then
            let node = create_node name' list' in
            pop_stack (node :: list) stack until level'
          else
            let leaf = create_leaf name' in
            pop_stack (leaf :: list) stack until level'

let convert_to_tree list =
  let rec aux acc stack = function
    | [] -> acc
    | ((level, _) as i) :: ((level', _) as j) :: t ->
      if level' < level then
        begin
          Stack.push (i, []) stack;
          let (list', stack') = pop_stack [] stack level' level in
          aux list' stack' (j :: t)
        end
      else
        begin
          Stack.push (i, []) stack;
          aux acc stack (j :: t)
        end
    | ((level, _) as i) :: [] ->
      (Stack.push (i, []) stack);
      let (list', _) = pop_stack [] stack 0 level in
      aux list' stack []
  in
  aux [] (Stack.create ()) list

let parse file_name =
  let channel = open_in file_name in
  in_channel_length channel
  |> really_input_string channel
  |> parse_string ~consume:Consume.Prefix lex
  |> function
    | Ok list -> List.iter (fun i ->
      print_endline @@ show i) (convert_to_tree list)
    | _ -> ()

Output

[
	Node ("1", [
		Leaf "1.1"
		;Node ("1.2", [
			Node ("1.2.1", [
				Leaf "1.2.1.1"
			])
			;Leaf "1.2.2"
		])
	])
	;Node ("2", [
		Leaf "2.1"
	])
]