Parsing simple recursive expressions with Angstrom

Hello. I’m trying to write a parser for a simple language with AST definition as follows

type term =
  | TmTrue
  | TmFalse
  | TmIf of term * term * term
  | TmZero

I’m struggling to write the parser for if expressions. An example if expression is like

if false then true else false; 

What I’ve got so far

open Angstrom
let ws =
  skip_while (function '\x20' | '\x0a' | '\x0d' | '\x09' -> true | _ -> false)

let lchar c = ws *> char c
let lrchar c = ws *> char c <* ws
let lrstring s = ws *> string s <* ws
let _true = lrstring "true" *> return TmTrue
let _false = lrstring "false" *> return TmFalse
let zero = lrchar '0' *> return TmZero

let expr =
  fix (fun expr ->
      let ifelse =
        lrstring "if" *> expr >>= fun cond ->
        lrstring "then" *> expr >>= fun thenexpr ->
        lrstring "else" *> expr >>| fun elseexpr ->
        return (TmIf (cond, thenexpr, elseexpr))
      choice [ _true; _false; zero; ifelse ])

However, this gives me a type error saying that the ifelse function has type term t t, but expected term t. I don’t get how the parser I’ve wrote has that type, if anyone could explain, I’d be really glad.

I also tried to use recursive function, however it gave “this kind of expression is not allowed in let rec”:

let rec ifelse =
  lrstring "if" *> expr >>= fun cond ->
  lrstring "then" *> expr >>= fun then_exp ->
  lrstring "else" *> expr >>= fun else_exp ->
  return (TmIf (cond, then_exp, else_exp))

and expr = choice [ _true; _false; zero; ifelse ]


>>| is 'a t -> ('a -> 'b) -> 'b t and return is 'a -> 'a t so >>| fun x -> return xxx is indeed of type term t t.

Removing the return should fix your type checking issue. (or alternatively switching from >>| to >>=)

I think the let syntax is a good fit for Angstrom parsing, so I would personally write it as:

let expr =
  let open Angstrom.Let_syntax in
  fix (fun expr ->
      let ifelse =
        let* cond = lrstring "if" *> expr in
        let* thenexpr = lrstring "then" *> expr in
        let+ elseexpr = lrstring "else" *> expr in
        (TmIf (cond, thenexpr, elseexpr))
      choice [ _true; _false; zero; ifelse ]);;
1 Like

Thanks, both solutions worked pretty well!

Also possible to have a more applicative style:

let+ cond = lrstring "if" *> expr
and+ thenexpr = lrstring "then" *> expr
and+ elseexpr = lrstring "else" *> expr in
TmIf (cond, thenexpr,...
1 Like