Binary encoding using Stdint

Hello!

I am playing around with some binary protocols and would like to have a nice interface for decoding various sized integers using Angstrom (https://github.com/inhabitedtype/angstrom) and encoding them in a nice way as well. For this I am using the stdint library https://github.com/andretnth/ocaml-stdint) and extending it with a Functor. This is working very nicely for decoding. For encoding I am observing unexpected behavior and need some help figuring out what is going on.

Following is the extension to stdint with runnable example (requires angstrom, stdint and hex):

let make_decoder bits of_bytes = 
  let open Angstrom in
  let byte_length = bits / 8 in
  Angstrom.count byte_length any_char >>= fun char_list ->
  of_bytes
    (Bytes.init byte_length (List.nth char_list))
    0
  |> return


let make_encode bits to_bytes =
  let byte_length = bits / 8 in
  let bytes = Bytes.create byte_length in
  fun v -> (
      to_bytes v bytes 0;
      bytes
    )

module Extend(A: Stdint.Int) = struct
  include A

  let decoder_little_endian  = 
    make_decoder A.bits A.of_bytes_little_endian

  let decoder_big_endian  = 
    make_decoder A.bits A.of_bytes_big_endian

  let encode_little_endian =
    make_encode A.bits A.to_bytes_little_endian

  let encode_big_endian =
    make_encode A.bits A.to_bytes_big_endian
end

module Int8 = Extend(Stdint.Int8)
type int8 = Int8.t

module Uint8 = Extend(Stdint.Uint8)
type uint8 = Uint8.t

module Int16 = Extend(Stdint.Int16)
type int16 = Int16.t

module Uint16 = Extend(Stdint.Uint16)
type uint16 = Uint16.t

module Uint32 = Extend(Stdint.Uint32)
type uint32 = Uint32.t

let () =
  let n = 42 in
  let hexdump = fun s -> s |> Hex.of_string |> Hex.hexdump in
  n |> Int16.of_int |> Int16.encode_little_endian |> hexdump;
  n + 1 |> Int16.of_int |> Int16.encode_little_endian |> hexdump;
  [n; n + 1] |> List.map (fun x -> x |> Int16.of_int |> Int16.encode_little_endian) |> String.concat "" |> hexdump;

The program outputs:

00000000: 2a00                                     *
00000000: 2b00                                     +
00000000: 2b00 2b00                                ++

Here [n; n+1] encodes (incorrectly to) 2b00 2b00 even tough n encodes to 2a00 and n+1 to 2b00.

I would expect:

00000000: 2a00                                     *
00000000: 2b00                                     +
00000000: 2a00 2b00                                *+

So encoding n and n+1 separately is ok, but doing it together in List.map does weird stuff. This is very side-effecty and I can’t figure out how or why.

In make_encode you’re using a single buffer, so when you make multiple calls and then print the result, you only get the last encoded value.

This change fixes it:

 let make_encode bits to_bytes =
   let byte_length = bits / 8 in
-  let bytes = Bytes.create byte_length in
   fun v -> (
+      let bytes = Bytes.create byte_length in
       to_bytes v bytes 0;
       bytes
     )

Beautiful. Thank you very much!