To clarify some more,
%token <token list> DOCBLOCK
ends up generating code of the form:
type token = ...
| DOCBLOCK of token list
i.e the token itself can track some additional information, usually from the lexer, however, this data is opaque to the parser.
Typically, arguments to tokens are used for things like identifiers or literals - here’s an example from a .mly
file I had lying around:
(* parser.mly *)
%token <int> INT
%token <float> FLOAT
%token <bool> BOOL
%token AND
...
expr:
| b = BOOL { Exp_bool (mkloc ~loc:$loc b) }
| i = INT { Exp_number (mkloc ~loc:$loc (Int i)) }
| f = FLOAT { Exp_number (mkloc ~loc:$loc (Float f)) }
| l1 = BOOL AND l2 = BOOL { Exp_and (l1, l2) }
...
;;
with a lexer which would populate the arguments to the tokens as follows:
(* lexer.mll *)
let digit = ['0' - '9']
let digit_char = ['0' - '9' '_']
let integral_number = digit digit_char*
let number = integral_number ('.' digit_char* )? (['e' 'E'] ['+' '-']? integral_number)?
rule token = parse
| newline { update_loc lexbuf 1 false 0; token lexbuf }
| blank+ {token lexbuf}
| "true" { BOOL true }
| "false" { BOOL false }
| number {
match int_of_string_opt (Lexing.lexeme lexbuf) with
| None -> FLOAT (float_of_string (Lexing.lexeme lexbuf))
| Some i -> INT i
}
| "&&" -> AND
and AST:
type ast =
| Exp_bool of bool
| Exp_int of int
| Exp_float of float
| Exp_and of bool * bool