So I did look at the βcontainersβ library first, which I understood more or less replaces βbatteriesβ.
I found containerβs implementation a little too complicated for my current OCaml knowledge.
Both batteries and containers donβt return the key on which we group things, so these implementations arenβt very useful to me.
Hereβs a recap from a few languages providing a built-in group_by function:
Ruby:
irb(main):001:0> lst = [%w[a b c], %w[d e], %w[f g h], %w[d e], %w[i j k l], %w[m n], %w[o]]
irb(main):002:0> lst.group_by(&:length)
=> {3=>[["a", "b", "c"], ["f", "g", "h"]], 2=>[["d", "e"], ["d", "e"], ["m", "n"]], 4=>[["i", "j", "k", "l"]], 1=>[["o"]]}
---
Clojure:
user=> (def lst '((a b c) (d e) (f g h) (d e) (i j k l) (m n) (o)))
{3 [(a b c) (f g h)], 2 [(d e) (d e) (m n)], 4 [(i j k l)], 1 [(o)]}
user=> (group-by count lst)
{3 [(a b c) (f g h)], 2 [(d e) (d e) (m n)], 4 [(i j k l)], 1 [(o)]}
---
Elixir:
iex(1)> lst = [['a', 'b', 'c'], ['d', 'e'], ['f', 'g', 'h'], ['d', 'e'], ['i', 'j', 'k', 'l'], ['m', 'n'], ['o']]
[
['a', 'b', 'c'],
['d', 'e'],
['f', 'g', 'h'],
['d', 'e'],
['i', 'j', 'k', 'l'],
['m', 'n'],
['o']
]
iex(2)> lst |> Enum.group_by(&length(&1))
%{
1 => [['o']],
2 => [['d', 'e'], ['d', 'e'], ['m', 'n']],
3 => [['a', 'b', 'c'], ['f', 'g', 'h']],
4 => [['i', 'j', 'k', 'l']]
}
---
F#:
> let lst = [['a';'b';'c'];['d';'e'];['f';'g';'h'];['d';'e'];['i';'j';'k';'l'];['m';'n'];['o']];;
val lst: char list list =
[['a'; 'b'; 'c']; ['d'; 'e']; ['f'; 'g'; 'h']; ['d'; 'e'];
['i'; 'j'; 'k'; 'l']; ['m'; 'n']; ['o']]
> List.groupBy List.length lst;;
val it: (int * char list list) list =
[(3, [['a'; 'b'; 'c']; ['f'; 'g'; 'h']]);
(2, [['d'; 'e']; ['d'; 'e']; ['m'; 'n']]); (4, [['i'; 'j'; 'k'; 'l']]);
(1, [['o']])]
---
Java:
jshell> var lst = List.of(List.of('a','b','c'), List.of('d','e'), List.of('f','g','h'), List.of('d','e'), List.of('i','j','k','l'), List.of('m', 'n'), List.of('o'))
lst ==> [[a, b, c], [d, e], [f, g, h], [d, e], [i, j, k, l], [m, n], [o]]
jshell> lst.stream().collect(Collectors.groupingBy(List::size))
$13 ==> {1=[[o]], 2=[[d, e], [d, e], [m, n]], 3=[[a, b, c], [f, g, h]], 4=[[i, j, k, l]]}
---
OCaml batteries, no key :(
β( 08:31:06 )β< command 0 >ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ{ counter: 0 }β
utop # #require "batteries";;
β( 08:31:07 )β< command 1 >ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ{ counter: 0 }β
utop # let lst = [['a';'b';'c'];['d';'e'];['f';'g';'h'];['d';'e'];['i';'j';'k';'l'];['m';'n'];['o']];;
val lst : char list list =
[['a'; 'b'; 'c']; ['d'; 'e']; ['f'; 'g'; 'h']; ['d'; 'e'];
['i'; 'j'; 'k'; 'l']; ['m'; 'n']; ['o']]
β( 08:31:16 )β< command 2 >ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ{ counter: 0 }β
utop # BatList.group (fun a b -> compare (List.length a) (List.length b)) lst;;
- : char list list list =
[[['o']]; [['d'; 'e']; ['d'; 'e']; ['m'; 'n']];
[['a'; 'b'; 'c']; ['f'; 'g'; 'h']]; [['i'; 'j'; 'k'; 'l']]]
Hereβs the final version I settled on, for ref (K_Nβs impl slightly tweaked at the end):
let group_by f lst =
let tbl = Hashtbl.create 1024 in
let rec aux = function
| [] -> ()
| h :: t ->
let key = f h in
let curr = Hashtbl.find_opt tbl key |> Option.value ~default:[] in
()
; Hashtbl.replace tbl key (h :: curr)
; aux t
in
()
; aux lst
; Hashtbl.fold (fun k v acc -> (k, v) :: acc) tbl []
;;