Is Unix.open_process_args_in broken?

I was playing around with the Unix module and open_process_args_in seems broken to me or am I holding it wrong?

I tried building the below program like so: ocamlopt unix.cmxa test.ml && chmod +x a.out && ./a.out and only the output from the first execution of iostat is being printed. This requires the sysstat package to be installed.

let rec printer i =
  try
    Printf.printf "%s\n" (input_line i);
    printer i
  with End_of_file -> ()

let () =
  let i = Unix.open_process_in "iostat -dxy 1 1" in
  let i2 = Unix.open_process_args_in "iostat" [| "-dxy"; "1"; "1" |] in
  flush_all ();

  printer i;
  close_in i;
  Printf.printf "### SEPARATOR ###\n";
  printer i2;
  close_in i2

What do you see in strace? (or on BSD, “truss” ?)

❯ strace ./a.out
execve("./a.out", ["./a.out"], 0x7fffa5e96400 /* 86 vars */) = 0
brk(NULL)                               = 0x66e000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffd700d73b0) = -1 EINVAL (Invalid argument)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=155615, ...}) = 0
mmap(NULL, 155615, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8f392c7000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\363\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1920608, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f392c5000
mmap(NULL, 1331216, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f3917f000
mmap(0x7f8f3918e000, 638976, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xf000) = 0x7f8f3918e000
mmap(0x7f8f3922a000, 626688, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xab000) = 0x7f8f3922a000
mmap(0x7f8f392c3000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x143000) = 0x7f8f392c3000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\"\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=37384, ...}) = 0
mmap(NULL, 24688, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f39178000
mmap(0x7f8f3917a000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f8f3917a000
mmap(0x7f8f3917c000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f8f3917c000
mmap(0x7f8f3917d000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f8f3917d000
mmap(0x7f8f3917e000, 112, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8f3917e000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \203\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\1\0\0\300\4\0\0\0\330\1\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0001\6Asl]R\276X\363\351\357\243Y\3H"..., 68, 896) = 68
fstat(3, {st_mode=S_IFREG|0755, st_size=3222128, ...}) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 1876640, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f38fad000
mprotect(0x7f8f38fd3000, 1683456, PROT_NONE) = 0
mmap(0x7f8f38fd3000, 1372160, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f8f38fd3000
mmap(0x7f8f39122000, 307200, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x175000) = 0x7f8f39122000
mmap(0x7f8f3916e000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c0000) = 0x7f8f3916e000
mmap(0x7f8f39174000, 12960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8f39174000
close(3)                                = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f38faa000
arch_prctl(ARCH_SET_FS, 0x7f8f38faa740) = 0
mprotect(0x7f8f3916e000, 12288, PROT_READ) = 0
mprotect(0x7f8f3917d000, 4096, PROT_READ) = 0
mprotect(0x7f8f392c3000, 4096, PROT_READ) = 0
mprotect(0x4a7000, 4096, PROT_READ)     = 0
mprotect(0x7f8f39318000, 4096, PROT_READ) = 0
munmap(0x7f8f392c7000, 155615)          = 0
brk(NULL)                               = 0x66e000
brk(0x68f000)                           = 0x68f000
brk(NULL)                               = 0x68f000
mmap(NULL, 2105344, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f38da8000
mmap(NULL, 1024000, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f38cae000
sigaltstack({ss_sp=0x4da3a0, ss_flags=0, ss_size=8192}, NULL) = 0
rt_sigaction(SIGSEGV, {sa_handler=0x4693b0, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_NODEFER|SA_SIGINFO, sa_restorer=0x7f8f38feaa60}, NULL, 8) = 0
readlink("/proc/self/exe", "/home/rbjorklin/a.out", 256) = 21
stat("/home/rbjorklin/a.out", {st_mode=S_IFREG|0755, st_size=2173176, ...}) = 0
brk(NULL)                               = 0x68f000
brk(0x6b9000)                           = 0x6b9000
lseek(0, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
mmap(NULL, 794624, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f38bec000
lseek(1, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
lseek(2, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
pipe2([3, 4], O_CLOEXEC)                = 0
fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
brk(NULL)                               = 0x6b9000
brk(0x6e9000)                           = 0x6e9000
lseek(3, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f8f38faaa10) = 8879
close(4)                                = 0
pipe2([4, 5], O_CLOEXEC)                = 0
fstat(4, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
lseek(4, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f8f38faaa10) = 8880
close(5)                                = 0
read(3, 0x6b8e40, 65536)                = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=8880, si_uid=1000, si_status=127, si_utime=0, si_stime=0} ---
read(3, "Linux 5.9.10-200.fc33.x86_64 (bl"..., 65536) = 74
read(3, "\n", 65536)                    = 1
read(3, "Device            r/s     rkB/s "..., 65536) = 211
read(3, "    0.00      0.00     0.00   0."..., 65523) = 396
read(3, "    0.00      0.00     0.00   0."..., 65523) = 291
read(3, "     0.00    0.00      0.00     "..., 65430) = 266
read(3, "    0.00    0.00   0.00\nsdb     "..., 65362) = 221
read(3, "\n\n\n", 65339)                = 3
read(3, "", 65536)                      = 0
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=8879, si_uid=1000, si_status=0, si_utime=0, si_stime=0} ---
close(3)                                = 0
read(4, "", 65536)                      = 0
close(4)                                = 0
write(1, "Linux 5.9.10-200.fc33.x86_64 (bl"..., 1481Linux 5.9.10-200.fc33.x86_64 (black-tower)  11/27/2020      _x86_64_        (8 CPU)


Device            r/s     rkB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wkB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dkB/s   drqm/s  %drqm d_await dareq-sz     f/s f_await  aqu-sz  %util
dm-0             0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.00   0.00
dm-1             0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.00   0.00
dm-2             0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.00   0.00
nvme0n1          0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.00   0.00
sda              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.00   0.00
sdb              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.00   0.00


### SEPARATOR ###
) = 1481
exit_group(0)                           = ?
+++ exited with 0 +++

It appears that open_process_in uses the shell to run the command, where open_process_args_in is closer to the actual execve call. So you have to (a) specify the actual path to the program, and (b) you have to specify args correctly. That means the first (zeroth) arg must be the program-name, viz.

  let i2 = Unix.open_process_args_in "/usr/bin/iostat" [| "/usr/bin/iostat"; "-dxy"; "1"; "1" |] in

Ah, look at that… Had I read the entire documentation snippet for the working function (open_process_in) I would have spotted this myself…

Thanks for clearing that up!

Well, uh, in your defense, the doc-comment for open_process_args_in doesn’t really make clear that the command is not interpreted by the shell. Nor that the args need to be in a form suitable for direct passing to exec. I went right to the source code, skipping the documentation, or wouldn’t have noticed it either.

ETA: It’s true that the doc-comment for open_process_in does specify that the command is interpreted by the shell. But this is still … well, not exactly as helpful as it sounds, since really what you’d want is for the other function to specify that it was a really low-level interface, etc.

Thanks, I created a PR for a documentation change here.

The zeroth argument doesn’t actually have to be the command that’s being executed, this does indeed work:

let rec printer i =
  try
    Printf.printf "%s\n" (input_line i);
    printer i
  with End_of_file -> ()

let () =
  let i = Unix.open_process_in "iostat -dxy 1 1" in
  let i2 = Unix.open_process_args_in "/usr/bin/iostat" [| "-dxy"; "1"; "1" |] in
  flush_all ();

  printer i;
  close_in i;
  Printf.printf "### SEPARATOR ###\n";
  printer i2;
  close_in i2

Mmm … well, the problem with that is, you get different output than you get with what I provided in my previous comment. Basically, you -do- need some zeroth argument, so that your "-dxy" isn’t treated as the zeroth argument (and hence not processed as an argument.

Oh, I didn’t notice that at first but you are very much correct! Thanks again!

It’s a point, though - argument 0 is a command line keyword, however rarely used as such. Bash for example behaves differently depending on whether invoked as “sh” or “bash”, which of course it determines by inspecting argument 0.

I know that some programs use argv[0] as the indication of the filesystem location of the binary, from which they compute the install-tree for the package containing that program, to lookup libraries, config files, etc. I do it from time-to-time, for sure.

A more notable example of the 0’th argument being relevant is in single-binary, multiple symlink programs like busybox, where commands like cp, rm, find and others are all just symlinks to busybox, and busybox switches on argv[0] to decide what it should be doing.

I guess this is to say, most programs don’t care what you call them, but some do.