Sure, I wasn’t trying to say that a compiler should be able to optimize a non-tail recursive code into the tail-recursive. The problem is that so far OCaml didn’t provide the loop unrolling. And yes, the loop unrolling may be even applied to the List.map, here is the C version of the map function:
char **map(char *xs, char **(f (char *))) {
if (xs) {
char **y = cons();
y[0] = f(xs[0]);
char ** ys = map(xs[1], f);
y[1] = ys;
return y;
}
return 0;
}
and here is the gcc output, with cons
inlined 8 times:
map:
.LFB0:
pushq %r15
pushq %r14
pushq %r13
pushq %r12
movq %rdi, %r12
pushq %rbp
pushq %rbx
subq $56, %rsp
testq %rdi, %rdi
je .L11
xorl %eax, %eax
movq %rsi, %rbx
call cons
movsbq (%r12), %rdi
movslq %eax, %rbp
call *%rbx
movq %rax, 0(%rbp)
movsbq 1(%r12), %r13
testq %r13, %r13
je .L12
xorl %eax, %eax
call cons
movsbq 0(%r13), %rdi
movslq %eax, %r12
call *%rbx
movq %rax, (%r12)
movsbq 1(%r13), %r14
testq %r14, %r14
je .L13
xorl %eax, %eax
call cons
movsbq (%r14), %rdi
movslq %eax, %r13
call *%rbx
movq %rax, 0(%r13)
movsbq 1(%r14), %r15
testq %r15, %r15
je .L14
xorl %eax, %eax
call cons
movsbq (%r15), %rdi
movslq %eax, %r14
call *%rbx
movq %rax, (%r14)
movsbq 1(%r15), %r15
testq %r15, %r15
je .L15
xorl %eax, %eax
call cons
movsbq (%r15), %rdi
cltq
movq %rax, 8(%rsp)
call *%rbx
movq 8(%rsp), %rsi
movq %rax, (%rsi)
movsbq 1(%r15), %r15
testq %r15, %r15
je .L16
xorl %eax, %eax
call cons
movsbq (%r15), %rdi
cltq
movq %rax, 16(%rsp)
call *%rbx
movq 16(%rsp), %rcx
movq %rax, (%rcx)
movsbq 1(%r15), %rdi
testq %rdi, %rdi
movq %rdi, 24(%rsp)
je .L17
xorl %eax, %eax
call cons
movq 24(%rsp), %r9
cltq
movq %rax, %r15
movq %rax, 32(%rsp)
movsbq (%r9), %rdi
call *%rbx
movq 24(%rsp), %r11
movq %rax, (%r15)
movsbq 1(%r11), %rax
testq %rax, %rax
movq %rax, 24(%rsp)
je .L18
xorl %eax, %eax
call cons
movq 24(%rsp), %rsi
cltq
movq %rax, %r15
movq %rax, 40(%rsp)
movsbq (%rsi), %rdi
call *%rbx
movq 24(%rsp), %rcx
movq %rax, (%r15)
movsbq 1(%rcx), %rdi
testq %rdi, %rdi
movq %rdi, 24(%rsp)
je .L19
xorl %eax, %eax
call cons
movq 24(%rsp), %r9
cltq
movq %rax, %r15
movsbq (%r9), %rdi
call *%rbx
movq 24(%rsp), %r11
movq %rax, (%r15)
movq %rbx, %rsi
movsbq 1(%r11), %rdi
call map
movq %r15, %r8
movq %rax, 8(%r15)
.L10:
movq 40(%rsp), %rax
movq %r8, 8(%rax)
movq %rax, %rbx
.L9:
movq 32(%rsp), %rsi
movq %rbx, 8(%rsi)
movq %rsi, %r8
.L8:
movq 16(%rsp), %rdx
movq %r8, 8(%rdx)
.L7:
movq 8(%rsp), %rcx
movq %rdx, 8(%rcx)
movq %rcx, %rax
.L6:
movq %rax, 8(%r14)
.L5:
movq %r14, 8(%r13)
.L4:
movq %r13, 8(%r12)
.L3:
movq %r12, 8(%rbp)
movq %rbp, %rax
.L2:
addq $56, %rsp
popq %rbx
popq %rbp
popq %r12
popq %r13
popq %r14
popq %r15
ret