Dune Cram tests with several file outputs

Well, in the end I wrote a Python (but could be OCaml) script to help me keep using Cram tests with external outputs. If there is a more intelligent way to do this, please tell me.

The idea is:

  • I add diff commands to run.t; after dune runtest && dune promote, the diffs will be added directly to run.t itself as large blocks of >-prefixed lines;
  • I then run this re-promote script to extract these diffs to the oracle files themselves;
  • Finally, the re-promote script re-runs dune runtest && dune promote to get run.t to become pristine once again.

This requires running the tests at least twice (currently they are run three times, because re-promote does some extra checking), but saves writing any dune rules at all. Also, diff -N produces an output that is usable even if the oracle files do not exist yet, so I don’t have to manually touch them. In case someone might be interested…

#!/usr/bin/env python

from pathlib import Path
import re
import subprocess
import sys
import tempfile

if len(sys.argv) < 2:
    sys.exit(f"usage: {sys.argv[0]} testdir.t")

testdir = Path(sys.argv[1])
if not testdir.exists():
    sys.exit(f"error: test directory not found: {testdir}")
if not testdir.is_dir():
    sys.exit(f"error: not a test directory: {testdir}")
testfile = testdir / "run.t"
if not testfile.exists():
    sys.exit(f"error: test file not found: {testfile}")
build_target = testdir.parent / testdir.stem  # remove '.t' from directory name

# sanity check: run 'dune build @testdir' to see if oracles had been promoted,
# warn otherwise
proc = subprocess.run(["dune", "build", f"@{build_target}"], check=False, stderr=subprocess.DEVNULL)
if proc.returncode != 0:
    sys.exit(
        f"error: 'dune build @{build_target}' returned non-zero ({proc.returncode}). "
        + "Make sure to run 'dune promote' before running this script."
    )


def is_end_of_previous_oracle(line):
    if not line.startswith("  "):  # comment: previous oracle has finished
        return True
    if line.startswith("  $"):  # new command: previous oracle has finished
        return True
    return False


def is_start_of_new_diff(line):
    return line.startswith("  $ diff")


# we assume no path/filename contains spaces
re_diff_begin = re.compile(r"  \$ diff (-[a-zA-Z0-9] *)* ([^ ]*) ([^ ]*)")
patched_files = 0
with open(testfile, "r", encoding="utf-8") as f:
    diff_lines: list[str] = []
    dest = None
    collecting = False
    for line in f.readlines():
        if is_end_of_previous_oracle(line):
            assert not diff_lines, "non-empty diff lines must imply non-zero diff exit code"
            collecting = False
        if is_start_of_new_diff(line):
            collecting = True
            m = re_diff_begin.match(line)
            assert m, f"diff command not matching expected regex: {line.rstrip()}"
            dest = m.group(2)
        elif collecting:
            if line.rstrip() == "  [1]":
                # end of test oracle, diff returned non-zero
                collecting = False
                tmp = tempfile.NamedTemporaryFile(prefix="re-promote_", suffix=".diff")
                with open(tmp.name, "w", encoding="utf-8") as f:
                    for line in diff_lines:
                        f.write(line)
                    f.flush()
                assert dest, "dest must have been set"
                subprocess.check_output(["patch", dest, tmp.name], cwd=testdir)
                tmp.close()
                print(f"applied patches to: {testdir}/{dest}")
                patched_files += 1
                diff_lines = []
                continue
            diff_lines.append(line[2:])  # remove spaces added to Cram test oracle

assert (
    not diff_lines or not collecting
), f"file should have finished with either successful (empty) diff or a non-empty diff exit code. diff_lines: {diff_lines}, collecting: {collecting}"

print(f"re-promoted {patched_files} oracle(s).")
print(f"re-running 'dune build @{build_target} && dune promote'")
subprocess.run(
    ["dune", "build", f"@{build_target}"], check=False, stderr=subprocess.DEVNULL
)  # will fail if an update was expected
subprocess.check_output(["dune", "promote"])