Maybe you could desugar this (if you don’t want to modify the actual compiler) into multiple octets and preserve exhausting checking. Not that it would be particularly beautiful or efficient, but numerical range matching is such as a common problem that everyone just implements it with when
guards today, which is not very pretty either.
ie something like (with a 31bit int
type on x86):
match 123 with
| 0 .. 122 -> 1
| 10 -> .
| 123 .. 4096 -> 2
->
(* shifted to account for GC bit *)
match '\123', '\000', '\000', '\000' with
| '\000'..'\122', '\000', '\000', '\000' -> 1
| '\010', '\000', '\000', '\000' -> .
| ( '\000', '\123'..'\255', '\000', '\000'
| '\000', '\016', '\000', '\000'
| _ , '\000'..'\015', '\000', '\000') -> 2
Incidentally this invokes the exhaustiveness checker which trips somewhat somewhat as this takes 20 seconds to evaluate on my machine after outputting 1648 lines of missing cases, but perhaps that should be fixed upstream anyway to make it abort earlier:
$ ocaml a.ml
File "./a.ml", line 1, characters 11-283:
Warning 8: this pattern-matching is not exhaustive.
Here is an example of a case that is not matched:
(('\000', '\001', '\000', 'a')|('\000', '\001', 'a', _)|
('\000', '\002', '\000', 'a')|('\000', '\002', 'a', _)|
(* ... another 1646 examples ... *)
If you add a | _ -> 4
to my example it works as intended.
EDIT: Fixed off-by-one, added example:
let mymatch ch ch2 ch3 ch4 = match ch, ch2, ch3, ch4 with
| '\000' .. '\122', '\000', '\000', '\000' -> 1
| '\010', '\000', '\000', '\000' -> .
| ( '\000', '\123'..'\255', '\000', '\000'
| '\000', '\016', '\000', '\000'
| _ , '\000'..'\015', '\000', '\000') -> 2
| _ -> 3
let pack_int =
let int_bytes = (Sys.int_size + 1) / 8 in
let buf = Bytes.init int_bytes (fun _ -> '\000') in
let rec loop n acc =
if n = ~-1 then buf else begin
Bytes.set buf (7-n) (char_of_int (acc land 0xff)) ;
loop (n-1) (acc lsr 8) end in
loop (int_bytes-1)
let pack_int31 x =
if Sys.int_size <> 63 then pack_int x
else Bytes.sub (pack_int x) 0 4
let print x =
let b = (pack_int31 x) in
print_int Bytes.(mymatch (get b 0) (get b 1) (get b 2) (get b 3)) ;
print_newline ()
let () = print 0x3231
let () = print 10
let () = print 122
let () = print 123
let () = print 125
this compiles to:
0000000000018d10 <camlA__mymatch_1002>:
18d10: 48 83 f8 01 cmp $0x1,%rax
18d14: 74 0a je 18d20 <camlA__mymatch_1002+0x10>
18d16: 48 3d f7 00 00 00 cmp $0xf7,%rax
18d1c: 7d 4a jge 18d68 <camlA__mymatch_1002+0x58>
18d1e: eb 2c jmp 18d4c <camlA__mymatch_1002+0x3c>
18d20: 48 83 fb 21 cmp $0x21,%rbx
18d24: 74 1a je 18d40 <camlA__mymatch_1002+0x30>
18d26: 48 81 fb f7 00 00 00 cmp $0xf7,%rbx
18d2d: 7c 1d jl 18d4c <camlA__mymatch_1002+0x3c>
18d2f: 48 83 ff 01 cmp $0x1,%rdi
18d33: 75 17 jne 18d4c <camlA__mymatch_1002+0x3c>
18d35: 48 83 fe 01 cmp $0x1,%rsi
18d39: 74 49 je 18d84 <camlA__mymatch_1002+0x74>
18d3b: eb 0f jmp 18d4c <camlA__mymatch_1002+0x3c>
18d3d: 0f 1f 00 nopl (%rax)
18d40: 48 83 ff 01 cmp $0x1,%rdi
18d44: 75 06 jne 18d4c <camlA__mymatch_1002+0x3c>
18d46: 48 83 fe 01 cmp $0x1,%rsi
18d4a: 74 38 je 18d84 <camlA__mymatch_1002+0x74>
18d4c: 48 83 fb 01 cmp $0x1,%rbx
18d50: 75 16 jne 18d68 <camlA__mymatch_1002+0x58>
18d52: 48 83 ff 01 cmp $0x1,%rdi
18d56: 75 10 jne 18d68 <camlA__mymatch_1002+0x58>
18d58: 48 83 fe 01 cmp $0x1,%rsi
18d5c: 75 0a jne 18d68 <camlA__mymatch_1002+0x58>
18d5e: 48 c7 c0 03 00 00 00 mov $0x3,%rax
18d65: c3 retq
18d66: 66 90 xchg %ax,%ax
18d68: 48 83 fb 21 cmp $0x21,%rbx
18d6c: 7d 0e jge 18d7c <camlA__mymatch_1002+0x6c>
18d6e: 48 83 ff 01 cmp $0x1,%rdi
18d72: 75 08 jne 18d7c <camlA__mymatch_1002+0x6c>
18d74: 48 83 fe 01 cmp $0x1,%rsi
18d78: 75 02 jne 18d7c <camlA__mymatch_1002+0x6c>
18d7a: eb 08 jmp 18d84 <camlA__mymatch_1002+0x74>
18d7c: 48 c7 c0 07 00 00 00 mov $0x7,%rax
18d83: c3 retq
18d84: 48 c7 c0 05 00 00 00 mov $0x5,%rax
18d8b: c3 retq
18d8c: 0f 1f 40 00 nopl 0x0(%rax)