I looked at CamlInternalFormat and its Javascript output. The result in fact seem rather natural: this module has more code than most other modules in the standard library, because we support many different formatting features (padding, conversion, runtime parsing of format strings, etc.).
$ (cd .../stdlib/ && wc -c *.ml | sort -n -r | head -n 10)
690517 total
124679 camlinternalFormat.ml
54300 scanf.ml
50750 format.ml
29125 camlinternalFormatBasics.ml
25948 bytes.ml
22515 ephemeron.ml
21871 stdlib.ml
19469 set.ml
19103 camlinternalOO.ml
Looking at the Javascript output, I believe that js_of_ocaml could save some space by optimizing pattern-matching for space. (Iām showing the non-minified outputs below, but the same things occur in the minified output, just with shorter variable names.)
(1) There are several cases of patterns that only return constants, and could be turned into a table lookup. For example:
switch(fconv[2])
{case 0:return 102;
case 1:return 101;
case 2:return 69;
case 3:return 103;
case 4:return 71;
case 5:return cF;
case 6:return 104;
case 7:return 72;
default:return 70}
Sometimes only some of the cases are constant, or parts of the switch are affine, or a āconstructor reuseā detection could notice a common opportunity to share code; these would require more sophisticated analyses in the compiler
switch(ty2[0])
{case 10:break;
case 11:switch$0 = 1;break;
case 12:switch$0 = 2;break;
case 13:switch$0 = 3;break;
case 14:switch$0 = 4;break;
case 8:switch$0 = 5;break;
case 9:switch$0 = 6;break;
default:throw [0,Assert_failure,_b_]}
switch(param[0])
{case 0:var rest=param[1];return [0,symm(rest)];
case 1:var rest$0=param[1];return [1,symm(rest$0)];
case 2:var rest$1=param[1];return [2,symm(rest$1)];
case 3:var rest$2=param[1];return [3,symm(rest$2)];
case 4:var rest$3=param[1];return [4,symm(rest$3)];
case 5:var rest$4=param[1];return [5,symm(rest$4)];
case 6:var rest$5=param[1];return [6,symm(rest$5)];
case 7:var rest$6=param[1];return [7,symm(rest$6)];
case 8:var rest$7=param[2],ty=param[1];return [8,ty,symm(rest$7)];
case 9:
var rest$8=param[3],ty2=param[2],ty1=param[1];
return [9,ty2,ty1,symm(rest$8)];
case 10:var rest$9=param[1];return [10,symm(rest$9)];
case 11:var rest$10=param[1];return [11,symm(rest$10)];
case 12:var rest$11=param[1];return [12,symm(rest$11)];
case 13:var rest$12=param[1];return [13,symm(rest$12)];
default:var rest$13=param[1];return [14,symm(rest$13)]}}
(3) Sometimes the right-hand-sides of a pattern could be shared, but they are not shared.
switch(ign[0])
{case 0:return type_ignored_param_one(ign,rest,fmtty);
case 1:return type_ignored_param_one(ign,rest,fmtty);
case 2:return type_ignored_param_one(ign,rest,fmtty);
case 3:return type_ignored_param_one(ign,rest,fmtty);
case 4:return type_ignored_param_one(ign,rest,fmtty);
case 5:return type_ignored_param_one(ign,rest,fmtty);
case 6:return type_ignored_param_one(ign,rest,fmtty);
case 7:return type_ignored_param_one(ign,rest,fmtty);
case 8:
var sub_fmtty$2=ign[2],pad_opt$1=ign[1];
return type_ignored_param_one
([8,pad_opt$1,sub_fmtty$2],rest,fmtty);
case 9:
var
sub_fmtty$3=ign[2],
pad_opt$2=ign[1],
_dz_=type_ignored_format_substituti(sub_fmtty$3,rest,fmtty),
match$35=_dz_[2],
fmtty$22=match$35[2],
fmt$22=match$35[1],
sub_fmtty$4=_dz_[1];
return [0,[23,[9,pad_opt$2,sub_fmtty$4],fmt$22],fmtty$22];
case 10:return type_ignored_param_one(ign,rest,fmtty);
default:return type_ignored_param_one(ign,rest,fmtty)}
switch(fmtty[0])
{case 0:
var rest=fmtty[1];
return function(param){return make_from_fmtty(k,acc,rest,fmt)};
case 1:
var rest$0=fmtty[1];
return function(param){return make_from_fmtty(k,acc,rest$0,fmt)};
case 2:
var rest$1=fmtty[1];
return function(param){return make_from_fmtty(k,acc,rest$1,fmt)};
case 3:
var rest$2=fmtty[1];
return function(param){return make_from_fmtty(k,acc,rest$2,fmt)};
case 4:
var rest$3=fmtty[1];
return function(param){return make_from_fmtty(k,acc,rest$3,fmt)};
[...]
}
(4) Some generated switch code is a bit weird, it defines unused variables and then uses continue
.
switch(ign[0])
{case 0:var fmtty$0=rest$18;continue;
case 1:var fmtty$0=rest$18;continue;
case 2:var fmtty$0=rest$18;continue;
case 3:var fmtty$0=rest$18;continue;
case 4:var fmtty$0=rest$18;continue;
case 5:var fmtty$0=rest$18;continue;
case 6:var fmtty$0=rest$18;continue;
case 7:var fmtty$0=rest$18;continue;
case 8:var fmtty$0=rest$18;continue;
case 9:
var fmtty$5=ign[2],_dF_=fmtty_of_fmt(rest$18);
return caml_call2(CamlinternalFormatBasics[1],fmtty$5,_dF_);
case 10:var fmtty$0=rest$18;continue;
default:var fmtty$0=rest$18;continue}
Some of these things could be done (or tuned) in the upstream compiler, the pattern-matcher could try harder to generate more compact code in some cases; or it could be changed in js_of_ocaml, which has its own analysis and re-optimization pipeline if I understand correctly.
This said, while there certainly would be some gains, the total reduction in size would probably be small, in the 5-15% range on this file.