Ocamllex refill / refill_handler example


#1

Does anyone know of a complete example using the refill feature of ocamllex? All I have found is a 4 year old proposal from @let-def which seems to be a little different from the current implementation.

The manual has a nice start but doesn’t cover how to populate the lexbuf (which is itself lightly documented).


An experimental, unofficial OCaml wiki
#2

So, after some digging around in stdlib’s Lexing,ml I came up with a pattern for a generalised monad solution simillar to the one in the manual. I have include the code below in case anyone is interested. The Lexutils is pretty much directly from Lexing.ml.

I suspect that the ocamllex refill feature needs some work as having to define the functor within a function seems a bit baroque

lex_test.ml

{
type token = EOL | INT of int | PLUS | EOF

open Lexing

module Make (M : sig
               type 'a t
               val return: 'a -> 'a t
               val bind: 'a t -> ('a -> 'b t) -> 'b t
               val fail : string -> 'a t

               val read : Bytes.t -> int -> int t
             end)
= struct

  let on_refill lexbuf =
    let buf = Bytes.create 512 in
    M.bind (M.read buf 512) (fun len ->
      Lexutils.fill_lexbuf buf len lexbuf;
      M.return ())

  let refill_handler k lexbuf =
      M.bind (on_refill lexbuf) (fun () -> k lexbuf)

}


refill {refill_handler}

rule token = parse
| [' ' '\t']
    { token lexbuf  }
| '\n'
    { M.return EOL }
| ['0'-'9']+ as i
    { M.return (INT (int_of_string i)) }
| '+'
    { M.return PLUS }
| eof
    { M.return EOF }
| _
    { M.fail "unexpected character" }

{
end

let create_lexxer ~reader =
  let module Lexxer = Make (struct
      type 'a t = ('a, string) result

      let return v = Ok v
      let bind a f =
        match a with
        | Error _ as err -> err
        | Ok a -> f a

      let fail s = Error s

      let read buf len =
        match reader buf len with
        | exception End_of_file -> return 0
        | exception Failure err -> fail err
        | len -> return len

    end)
  in
  fun lexbuf -> Lexxer.token lexbuf

let () =
  let lexbuf = Lexutils.create_lexbuf () in
  let reader buf len = input stdin buf 0 len in
  let lexxer = create_lexxer ~reader in
  let rec loop () =
    match lexxer lexbuf with
    | Ok EOL -> Printf.printf "EOL "; loop ()
    | Ok (INT i) -> Printf.printf "INT(%d) " i; loop ()
    | Ok PLUS -> Printf.printf "PLUS "; loop ()
    | Ok EOF -> Printf.printf "EOF "
    | Error err -> Printf.printf "\nERROR: %s\n" err
  in
  loop ()
}

lexutils.ml

open Lexing

let fill_lexbuf buf read_len lexbuf =
  let n = if read_len > 0 then read_len else (lexbuf.lex_eof_reached <- true; 0) in
  (* Current state of the buffer:
        <-------|---------------------|----------->
        |  junk |      valid data     |   junk    |
        ^       ^                     ^           ^
        0    start_pos             buffer_end    Bytes.length buffer
  *)
  if lexbuf.lex_buffer_len + n > Bytes.length lexbuf.lex_buffer then begin
    (* There is not enough space at the end of the buffer *)
    if lexbuf.lex_buffer_len - lexbuf.lex_start_pos + n <= Bytes.length lexbuf.lex_buffer
    then
      (* But there is enough space if we reclaim the junk at the beginning
         of the buffer *)
      Bytes.blit lexbuf.lex_buffer lexbuf.lex_start_pos
                  lexbuf.lex_buffer 0
                  (lexbuf.lex_buffer_len - lexbuf.lex_start_pos)
    else begin
      (* We must grow the buffer.  *)
      let newlen =
        min (max (2 * Bytes.length lexbuf.lex_buffer) n) Sys.max_string_length
      in
      if lexbuf.lex_buffer_len - lexbuf.lex_start_pos + n > newlen then
        failwith "Lexing.lex_refill: cannot grow buffer";
      let newbuf = Bytes.create newlen in
      (* Copy the valid data to the beginning of the new buffer *)
      Bytes.blit lexbuf.lex_buffer lexbuf.lex_start_pos
                  newbuf 0
                  (lexbuf.lex_buffer_len - lexbuf.lex_start_pos);
      lexbuf.lex_buffer <- newbuf
    end;
    (* Reallocation or not, we have shifted the data left by
       start_pos characters; update the positions *)
    let s = lexbuf.lex_start_pos in
    lexbuf.lex_abs_pos <- lexbuf.lex_abs_pos + s;
    lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - s;
    lexbuf.lex_start_pos <- 0;
    lexbuf.lex_last_pos <- lexbuf.lex_last_pos - s;
    lexbuf.lex_buffer_len <- lexbuf.lex_buffer_len - s ;
    let t = lexbuf.lex_mem in
    for i = 0 to Array.length t-1 do
      let v = t.(i) in
      if v >= 0 then
        t.(i) <- v-s
    done
  end;
  (* There is now enough space at the end of the buffer *)
  Bytes.blit buf 0 lexbuf.lex_buffer lexbuf.lex_buffer_len n;
  lexbuf.lex_buffer_len <- lexbuf.lex_buffer_len + n
;;

let zero_pos = {
  pos_fname = "";
  pos_lnum = 1;
  pos_bol = 0;
  pos_cnum = 0;
}
;;

let create_lexbuf () =
  {
    refill_buff = (fun _lexbuf -> ());
    lex_buffer = Bytes.create 1024;
    lex_buffer_len = 0;
    lex_abs_pos = 0;
    lex_start_pos = 0;
    lex_curr_pos = 0;
    lex_last_pos = 0;
    lex_last_action = 0;
    lex_mem = [||];
    lex_eof_reached = false;
    lex_start_p = zero_pos;
    lex_curr_p = zero_pos;
  }
;;