As promised the working definition of a combinator parser based on the library
fmlib.
First the abstract syntax (which I have slightly modified from your post):
(* Abstract syntax *)
type expression =
| Literal of int
| Variable of string
| Plus of expression * expression
| Lambda of string list * statement list
and statement =
| Return of expression
| If of expression * statement list * statement list
| Assign of string * expression
and definition =
string * string list * statement list
(* We want to parse a definition, therefore the final type is a definition *)
module Final =
struct
type t = definition
end
Now the parser module
module Parse =
struct
include Fmlib_parse.Character.Make (Unit) (Final) (String)
module String_set = Set.Make (String)
let keywords: String_set.t =
String_set.(empty |> add "if" |> add "return" |> add "lambda")
let whitespace: int t =
skip_zero_or_more (char ' ' </> char '\n' </> char '\r')
let keyword (kw: string): string t =
let* str = backtrack (string kw) kw in
let* _ = whitespace in
return str
(* A raw identifier is a string starting with letter, followed by zero or
* more letters, digits or underscores. *)
let raw_name: string t =
let* first = letter in
zero_or_more
(String.make 1 first)
(fun c str -> str ^ String.make 1 c)
(letter </> range '0' '9' </> char '_')
(* An identifier is a raw identifier which is not a keyword. Whitespace is
* stripped. *)
let identifier: string t =
let* id =
backtrack
(
let* name = raw_name in
if String_set.mem name keywords then
unexpected "identifier"
else
return name
)
"identifier"
in
let* _ = whitespace in
return id
(* Digit sequence with whitespace stripped. *)
let number: expression t =
let* n =
one_or_more
Fun.id
(fun n d -> 10 * n + d)
digit
in
let* _ = whitespace in
return (Literal n)
(* [p1] and [p2] are the opening and closing parentheses. *)
let parenthesized (p1: char) (p2: char) (p: unit -> 'a t): 'a t =
let* _ = char p1 in
let* _ = whitespace in
let* a = p () in
let* _ = whitespace in
let* _ = char p2 in
let* _ = whitespace in
return a
(* '(' name1, name2, ... ')' *)
let formal_arguments: string list t =
let separator =
let* _ = char ',' in
whitespace
in
parenthesized
'('
')'
(fun () ->
let* lst =
one_or_more_separated
(fun first -> [first])
(fun lst _ nxt -> nxt :: lst)
identifier
separator
</>
return []
in
return (List.rev lst))
(* Now the mutual recursive definition of [expression], atomic expression,
* statement and statement list. *)
let rec expression (): expression t =
let plus =
let* _ = char '+' in
whitespace
in
one_or_more_separated
Fun.id
(fun e1 _ e2 -> Plus (e1, e2))
(atomic ())
plus
and atomic (): expression t =
number
</>
(
let* id = identifier in
return (Variable id)
)
</>
(
let* _ = keyword "lambda" in
let* fargs = formal_arguments in
let* stmts = statements () in
return (Lambda (fargs, stmts))
)
</>
parenthesized '(' ')' expression
and statement (): statement t =
(
let* _ = keyword "return" in
let* e = expression () in
return (Return e)
)
</>
(
let* _ = keyword "if" in
let* bexp = parenthesized '(' ')' expression in
let* stmts1 = statements () in
let* _ = keyword "else" in
let* stmts2 = statements ()
in
return (If (bexp, stmts1, stmts2))
)
</>
(
let* id = identifier in
let* _ = char '=' in
let* _ = whitespace in
let* expr = expression () in
return (Assign (id, expr))
)
and statements (): statement list t =
parenthesized
'{'
'}'
(fun () ->
let* statements =
one_or_more_separated
(fun stmt -> [stmt])
(fun stmts _ nxt -> nxt :: stmts)
(statement ())
(let* _ = char ';' in whitespace)
in
return (List.rev statements))
(* name (arg1, arg2, ... ) { body } *)
let definition: definition t =
let* id = identifier in
let* fargs = formal_arguments in
let* stmts = statements () in
return (id, fargs, stmts)
end
Finally an inline test which proves that the parser works as expected.
let%test _ =
let open Parse in
let p =
Parser.run_on_string
"f1 (a,b,c) { return (lambda (i,j) { a = 10; return i }) }"
(make () definition)
in
Printf.printf "pos %d, %d\n" (Parser.line p) (Parser.column p);
Parser.has_succeeded p