Infinite loop when I try to save and re-throw an exception using caml_raise

I’m working with bindings for a C library (FreeTDS) that does error handling in callbacks, and I want to handle errors without blowing up FreeTDS’s current stack whenever on occurs, but for some reason when I call caml_raise, my code blocks forever in what seems to be an infinite loop. I’m guessing I’m doing something wrong with CAMLlocal1 or how I’m calling caml_raise, or maybe the way I’m preventing the exception from being GC’d.

The code I have now looks like this:

typedef struct User_data {
  value latest_exception;
} User_data;

static void userdata_free(DBPROCESS* proc)
{
  User_data* data = (User_data*)dbgetuserdata(proc);
  if (data == NULL)
    return;

  caml_remove_global_root(&(data->latest_exception));

  dbsetuserdata(proc, NULL);
  caml_stat_free(data);
}

static void userdata_setup(DBPROCESS* proc)
{
  userdata_free(proc);

  User_data* data = caml_stat_alloc(sizeof(User_data));
  data->latest_exception = Val_unit;
  caml_register_global_root(&(data->latest_exception));
  dbsetuserdata(proc, (BYTE*)data);
}

static void userdata_set_latest_exception(DBPROCESS *proc, value exn)
{
  CAMLparam1(exn);
  User_data* data = (User_data*)dbgetuserdata(proc);
  if(data == NULL)
    caml_raise(exn);

  data->latest_exception = exn;
  CAMLreturn0;
}

static void maybe_raise_userdata_exn(DBPROCESS* proc)
{
  CAMLparam0();
  CAMLlocal1(vexn);

  User_data* data = (User_data*)dbgetuserdata(proc);
  if(data == NULL || data->latest_exception == Val_unit)
    CAMLreturn0;

  vexn = data->latest_exception;
  data->latest_exception = Val_unit;
  caml_raise(vexn);
}

But if I actually raise an exception, I get an infinite loop in caml_raise:

(gdb) bt
#0  0x0000000000b835b3 in caml_raise (v=140737336655744) at fail.c:72
#1  0x0000000000b6f5ea in maybe_raise_userdata_exn (proc=proc@entry=0x7fffdc0090c0) at dblib_stubs.c:100
#2  0x0000000000b70113 in ocaml_freetds_dbresults (vdbproc=<optimized out>) at dblib_stubs.c:487
#3  0x0000000000682774 in camlMssql__Client__result_set_loop_9138 () at mssql/src/client.ml:58
#4  0x0000000000682668 in camlMssql__Client__run_query_8733 () at mssql/src/client.ml:85
#5  0x0000000000681aad in camlMssql__Mssql_error__with_wrap_inner_9163 () at mssql/src/mssql_error.ml:43
#6  0x0000000000a8f175 in camlBase__Result__try_with_2876 () at src/result.ml:161
#7  0x00000000006f3f22 in camlAsync_unix__In_thread__doit_2854 () at src/in_thread.ml:12
#8  0x00000000006deca2 in camlAsync_unix__Thread_pool__loop_49809 () at src/thread_pool.ml:397
#9  0x000000000084d536 in camlCore__Core_thread__f_3838 () at src/core_thread.ml:12
#10 0x00000000008a4999 in camlThread__fun_1571 () at thread.ml:39
#11 0x0000000000b9cfa2 in caml_start_program ()
#12 0x0000000000b79620 in caml_thread_start ()
#13 0x00007ffff7e3558e in start_thread () from /lib64/libpthread.so.0
#14 0x00007ffff7bce6a3 in clone () from /lib64/libc.so.6

Relevant code is here in the version of OCaml I’m using:

void caml_raise(value v)
{
  Unlock_exn();
  if (caml_exception_pointer == NULL) caml_fatal_uncaught_exception(v);

#ifndef Stack_grows_upwards
#define PUSHED_AFTER <
#else
#define PUSHED_AFTER >
#endif
  while (caml_local_roots != NULL &&
         (char *) caml_local_roots PUSHED_AFTER caml_exception_pointer) {
    caml_local_roots = caml_local_roots->next; // line 72
  }
#undef PUSHED_AFTER

  caml_raise_exception(v);
}

If I change the code to call caml_raise_exception instead of caml_raise, this doesn’t block, but I do get random segfaults later on caml_do_local_roots (which isn’t too surprising since caml_raise_exception is a private function and presumably this code that’s blocking is doing something important).

Does anyone know what I should be doing here?

If I print caml_local_roots in gdb, it looks like there’s a very small cycle, where caml_local_roots == caml_local_roots->next. Is this something my code is somehow messing up? I don’t understand why anything I did would have caused that.

(gdb) print caml_local_roots
$2 = (struct caml__roots_block *) 0x7fffe56ea5d0
(gdb) print *caml_local_roots
$3 = {next = 0x7fffe56ea5d0, ntables = 4, nitems = 1, tables = {0x7fffe56ea5b0, 0x7fffe56ea5b8, 0x7fffe56ea5c0, 0x7fffe56ea5c8, 0x7fffc4000020}}

Also I can confirm that this code works if I compile it in byte mode, just not in native mode.

That’s… suspicious.

It might help to have a driver that shows the problem. I put together a driver and I don’t see the error you describe.

I added these lines at the beginning of your C code:

#include <caml/mlvalues.h>
#include <caml/memory.h>
#include <caml/fail.h>

typedef unsigned char BYTE;

typedef struct dbprocess {
    void *user_data;
} DBPROCESS;

void *dbgetuserdata(DBPROCESS *proc)
{
    return proc->user_data;
}

void dbsetuserdata(DBPROCESS *proc, void *data)
{
    proc->user_data = data;
}

Then I added these lines after your C code:

static DBPROCESS g_proc = { NULL };

value setup(value unitval)
{
    CAMLparam1(unitval);
    userdata_setup(&g_proc);
    CAMLreturn(Val_unit);
}

value set_latest_exception(value exn)
{
    CAMLparam1(exn);
    userdata_set_latest_exception(&g_proc, exn);
    CAMLreturn(Val_unit);
}

value raise_exn(value unitval)
{
    CAMLparam1(unitval);
    maybe_raise_userdata_exn(&g_proc);
    CAMLreturn(Val_unit);
}

My ocaml driver looks like this:

$ cat try.ml
external setup : unit -> unit = "setup"
external set_latest_exception : exn -> unit = "set_latest_exception"
external raise_exn : unit -> unit = "raise_exn"

exception Local_exn of int * string

let main () =
    setup ();
    set_latest_exception (Local_exn (99, "failure message"));
    raise_exn ()

let () = main ()

When I run it I see this:

$ ./try
Fatal error: exception Try.Local_exn(99, "failure message")
$

No doubt this is because your code doesn’t look like my simple driver.

It might also be interesting to know if the behavior seems completely deterministic or if it varies.

After thinking about it more this weekend, I think this code is probably doing something wrong in the multithreading sections (not shown). I bet we’re messing with the ocaml variable list while the runtime lock is released. I’m going to try to fix that and I’ll check back here with a bigger example if that doesn’t work.

Ok, sorry to waste your time. This was definitely caused by calling CAMLparam, CAMLlocal, or CAMLreturn when the runtime lock was released.

A side question though: Is there a way to do the equivalent of CAMLreturn without immediately returning? I have an error callback that needs to handle OCaml locals but has to release the runtime lock before returning. I solved it by turning it into two functions (one calls the other function and then releases the runtime lock) but I’m curious if there’s a lower-level function to “do what CAMLreturn does but don’t return”.

In memory.h there is a definition CAMLdrop that does this.

1 Like

Thanks, that made changing my code to do this correctly a lot simpler!