On POSIX systems file descriptors (FD) are eagerly re-used. As a consequence,
use-after-close of a file descriptor is often not detected and in fact
harmful because it will manipulate a different file:
(* ocamlc -I +unix unix.cma -o foo foo.ml *)
let write fd msg =
Unix.write fd (Bytes.unsafe_of_string msg) 0 (String.length msg) |> ignore
let main () =
let fd_a = Unix.openfile "a.txt" Unix.[O_CREAT; O_RDWR] 0o666 in
write fd_a "aaa";
Unix.close fd_a;
let _fd_b = Unix.openfile "b.txt" Unix.[O_CREAT; O_RDWR] 0o666 in
write fd_a "aaa"; (* use after close, this goes into b *)
Unix.close fd_a (* this closes b *)
let () = main ()
In the code above the write to fd_a
goes into file b.txt
because the FD for a.txt
was re-used.
Here is a hack that could help detect it and I would like to discuss if this could be or should be integrated into a library:
- Unix (Linux) represents an FD as 32 bit but OCaml on 64-bit architecture uses an
int
that has 63 available bits. - On open, encode both the FD as before and in addition the inode of the FD into the OCaml
int
. - On close, check that the FD still points to the same inode if inode information is present. Raise an exception otherwise.
- FFI code that passes an FD back to C should be safe because the cast back to an
int
would discard the inode information.
This passes the OCaml test suite except for the
- testsuite/tests/lib-unix/common/cloexec.ml
which uses Obj.magic
and makes assumptions that a Unix file descriptor is an integer in the OCaml representation. This new representation would likewise break other code that does this.
Below is some code as a proof of concept against OCaml 4.14.1.
commit ed3a7835817b1a281f0738ade3de91322a68dcb9
Author: Christian Lindig <christian.lindig@cloud.com>
Date: Fri Dec 22 16:34:45 2023 +0000
Encode inode into FD for integrity check
On POSIX systems file descriptors are eagerly re-used. As a consequence
use-after-close of a filedescritor is often not detected and in fact
harmful because it will manipulate a different file.
On Linux, a file descriptor is 32bit wide and is respresented in OCaml
as an int, which is 63 bits on a 64-bit architecture. We can use the
extra bits for integrity checks:
* on Unix.openfile, encode the file descriptor and the lsb of the inode
into the OCaml value.
* on Unix.close, check that the file descriptor still refers to the same
inode. Raise an exception if this not the case; don't check for
integrity if no inode bits are present.
This patch breaks code that uses Obj.magic and the assumption that a
file descriptor is an integer. The test OCaml test suite contains such a
case for
* testsuite/tests/lib-unix/common/cloexec.ml
This patch is also not suitable for 32-bit architectures.
Signed-off-by: Christian Lindig <christian.lindig@cloud.com>
diff --git a/otherlibs/unix/close.c b/otherlibs/unix/close.c
index 961b8cbff1..3dc3c4ef72 100644
--- a/otherlibs/unix/close.c
+++ b/otherlibs/unix/close.c
@@ -17,9 +17,31 @@
#include <caml/signals.h>
#include "unixsupport.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+
CAMLprim value unix_close(value fd)
{
int ret;
+ long ffd;
+
+ /* check integrity */
+ ffd = Long_val(fd);
+ if ((ffd & 0x0fffffff00000000) != 0) {
+ struct stat statbuf;
+ size_t inode;
+ int rc;
+ long cs;
+
+ caml_enter_blocking_section();
+ rc = fstat(Int_val(fd), &statbuf) ;
+ caml_leave_blocking_section();
+ if (rc == -1) uerror("fstat", Nothing);
+ inode = statbuf.st_ino;
+ cs = ((inode << 32) & 0x0fffffff00000000) | Long_val(fd);
+ if (cs != ffd) uerror("integrity", Nothing);
+ }
+
caml_enter_blocking_section();
ret = close(Int_val(fd));
caml_leave_blocking_section();
diff --git a/otherlibs/unix/open.c b/otherlibs/unix/open.c
index cae1ce0fd0..ad10bd631b 100644
--- a/otherlibs/unix/open.c
+++ b/otherlibs/unix/open.c
@@ -25,6 +25,9 @@
#endif
#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
#ifndef O_NONBLOCK
#define O_NONBLOCK O_NDELAY
#endif
@@ -60,6 +63,10 @@ CAMLprim value unix_open(value path, value flags, value perm)
CAMLparam3(path, flags, perm);
int fd, cv_flags, clo_flags, cloexec;
char * p;
+ struct stat statbuf;
+ size_t inode;
+ long ffd;
+ int rc;
caml_unix_check_path(path, "open");
cv_flags = caml_convert_flag_list(flags, open_flag_table);
@@ -83,5 +90,13 @@ CAMLprim value unix_open(value path, value flags, value perm)
#if !defined(O_CLOEXEC)
if (cloexec) unix_set_cloexec(fd, "open", path);
#endif
- CAMLreturn (Val_int(fd));
+ /* remember inode in msb */
+ caml_enter_blocking_section();
+ rc = fstat(fd, &statbuf);
+ caml_leave_blocking_section();
+ if (rc == -1) uerror("fstat", path);
+ inode = statbuf.st_ino;
+ ffd = ((inode << 32) & 0x0fffffff00000000) | (long) fd;
+
+ CAMLreturn (Val_int(ffd));
}
diff --git a/tmp/foo.ml b/tmp/foo.ml
new file mode 100644
index 0000000000..6ce5740104
--- /dev/null
+++ b/tmp/foo.ml
@@ -0,0 +1,15 @@
+(* ocamlc -I +unix unix.cma -o foo foo.ml *)
+
+let write fd msg =
+ Unix.write fd (Bytes.unsafe_of_string msg) 0 (String.length msg) |> ignore
+
+let main () =
+ let fd_a = Unix.openfile "a.txt" Unix.[O_CREAT; O_RDWR] 0o666 in
+ write fd_a "aaa";
+ Unix.close fd_a;
+
+ let _fd_b = Unix.openfile "b.txt" Unix.[O_CREAT; O_RDWR] 0o666 in
+ write fd_a "aaa"; (* use after close, this goes into b *)
+ Unix.close fd_a (* this closes b *)
+
+let () = main ()