Numerical ranges and pattern matching

Maybe you could desugar this (if you don’t want to modify the actual compiler) into multiple octets and preserve exhausting checking. Not that it would be particularly beautiful or efficient, but numerical range matching is such as a common problem that everyone just implements it with when guards today, which is not very pretty either.

ie something like (with a 31bit int type on x86):

match 123 with
| 0 .. 122 -> 1
| 10 -> .
| 123 .. 4096 -> 2

->

 (* shifted to account for GC bit *)
match '\123',       '\000',      '\000', '\000' with
|  '\000'..'\122',  '\000',      '\000', '\000' -> 1
|     '\010',       '\000',      '\000', '\000' -> .
| (   '\000',    '\123'..'\255', '\000', '\000'
  |   '\000',       '\016',      '\000', '\000'
  |      _  ,    '\000'..'\015', '\000', '\000') -> 2

Incidentally this invokes the exhaustiveness checker which trips somewhat somewhat as this takes 20 seconds to evaluate on my machine after outputting 1648 lines of missing cases, but perhaps that should be fixed upstream anyway to make it abort earlier:

$ ocaml a.ml 
File "./a.ml", line 1, characters 11-283:
Warning 8: this pattern-matching is not exhaustive.
Here is an example of a case that is not matched:
(('\000', '\001', '\000', 'a')|('\000', '\001', 'a', _)|
('\000', '\002', '\000', 'a')|('\000', '\002', 'a', _)|
(* ... another 1646 examples ... *)

If you add a | _ -> 4 to my example it works as intended.

EDIT: Fixed off-by-one, added example:

let mymatch ch ch2 ch3 ch4 = match ch, ch2, ch3, ch4 with
| '\000' .. '\122', '\000', '\000', '\000' -> 1
| '\010',           '\000', '\000', '\000' -> .
| ( '\000', '\123'..'\255', '\000', '\000'
  | '\000', '\016',         '\000', '\000'
  |  _    , '\000'..'\015', '\000', '\000') -> 2
| _ -> 3

let pack_int =
  let int_bytes = (Sys.int_size + 1) / 8 in
  let buf = Bytes.init int_bytes (fun _ -> '\000') in
  let rec loop n acc =
   if n = ~-1 then buf else begin
   Bytes.set buf (7-n) (char_of_int (acc land 0xff)) ;
   loop (n-1) (acc lsr 8) end in
  loop (int_bytes-1)

let pack_int31 x =
  if Sys.int_size <> 63 then pack_int x
  else Bytes.sub (pack_int x) 0 4

let print x =
  let b = (pack_int31 x) in
  print_int Bytes.(mymatch (get b 0) (get b 1) (get b 2) (get b 3)) ;
  print_newline ()

let () = print 0x3231

let () = print 10
let () = print 122
let () = print 123
let () = print 125

this compiles to:

0000000000018d10 <camlA__mymatch_1002>:
   18d10:       48 83 f8 01             cmp    $0x1,%rax
   18d14:       74 0a                   je     18d20 <camlA__mymatch_1002+0x10>
   18d16:       48 3d f7 00 00 00       cmp    $0xf7,%rax
   18d1c:       7d 4a                   jge    18d68 <camlA__mymatch_1002+0x58>
   18d1e:       eb 2c                   jmp    18d4c <camlA__mymatch_1002+0x3c>
   18d20:       48 83 fb 21             cmp    $0x21,%rbx
   18d24:       74 1a                   je     18d40 <camlA__mymatch_1002+0x30>
   18d26:       48 81 fb f7 00 00 00    cmp    $0xf7,%rbx
   18d2d:       7c 1d                   jl     18d4c <camlA__mymatch_1002+0x3c>
   18d2f:       48 83 ff 01             cmp    $0x1,%rdi
   18d33:       75 17                   jne    18d4c <camlA__mymatch_1002+0x3c>
   18d35:       48 83 fe 01             cmp    $0x1,%rsi
   18d39:       74 49                   je     18d84 <camlA__mymatch_1002+0x74>
   18d3b:       eb 0f                   jmp    18d4c <camlA__mymatch_1002+0x3c>
   18d3d:       0f 1f 00                nopl   (%rax)
   18d40:       48 83 ff 01             cmp    $0x1,%rdi
   18d44:       75 06                   jne    18d4c <camlA__mymatch_1002+0x3c>
   18d46:       48 83 fe 01             cmp    $0x1,%rsi
   18d4a:       74 38                   je     18d84 <camlA__mymatch_1002+0x74>
   18d4c:       48 83 fb 01             cmp    $0x1,%rbx
   18d50:       75 16                   jne    18d68 <camlA__mymatch_1002+0x58>
   18d52:       48 83 ff 01             cmp    $0x1,%rdi
   18d56:       75 10                   jne    18d68 <camlA__mymatch_1002+0x58>
   18d58:       48 83 fe 01             cmp    $0x1,%rsi
   18d5c:       75 0a                   jne    18d68 <camlA__mymatch_1002+0x58>
   18d5e:       48 c7 c0 03 00 00 00    mov    $0x3,%rax
   18d65:       c3                      retq   
   18d66:       66 90                   xchg   %ax,%ax
   18d68:       48 83 fb 21             cmp    $0x21,%rbx
   18d6c:       7d 0e                   jge    18d7c <camlA__mymatch_1002+0x6c>
   18d6e:       48 83 ff 01             cmp    $0x1,%rdi
   18d72:       75 08                   jne    18d7c <camlA__mymatch_1002+0x6c>
   18d74:       48 83 fe 01             cmp    $0x1,%rsi
   18d78:       75 02                   jne    18d7c <camlA__mymatch_1002+0x6c>
   18d7a:       eb 08                   jmp    18d84 <camlA__mymatch_1002+0x74>
   18d7c:       48 c7 c0 07 00 00 00    mov    $0x7,%rax
   18d83:       c3                      retq   
   18d84:       48 c7 c0 05 00 00 00    mov    $0x5,%rax
   18d8b:       c3                      retq   
   18d8c:       0f 1f 40 00             nopl   0x0(%rax)
4 Likes