A new List.map that is both stack-safe and fast

Sure, I wasn’t trying to say that a compiler should be able to optimize a non-tail recursive code into the tail-recursive. The problem is that so far OCaml didn’t provide the loop unrolling. And yes, the loop unrolling may be even applied to the List.map, here is the C version of the map function:

char **map(char *xs, char **(f (char *))) {
    if (xs) {
        char **y = cons();
        y[0] = f(xs[0]);
        char ** ys = map(xs[1], f);
        y[1] = ys;
        return y;
    }
    return 0;
}

and here is the gcc output, with cons inlined 8 times:

map:
.LFB0:
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	movq	%rdi, %r12
	pushq	%rbp
	pushq	%rbx
	subq	$56, %rsp
	testq	%rdi, %rdi
	je	.L11
	xorl	%eax, %eax
	movq	%rsi, %rbx
	call	cons
	movsbq	(%r12), %rdi
	movslq	%eax, %rbp
	call	*%rbx
	movq	%rax, 0(%rbp)
	movsbq	1(%r12), %r13
	testq	%r13, %r13
	je	.L12
	xorl	%eax, %eax
	call	cons
	movsbq	0(%r13), %rdi
	movslq	%eax, %r12
	call	*%rbx
	movq	%rax, (%r12)
	movsbq	1(%r13), %r14
	testq	%r14, %r14
	je	.L13
	xorl	%eax, %eax
	call	cons
	movsbq	(%r14), %rdi
	movslq	%eax, %r13
	call	*%rbx
	movq	%rax, 0(%r13)
	movsbq	1(%r14), %r15
	testq	%r15, %r15
	je	.L14
	xorl	%eax, %eax
	call	cons
	movsbq	(%r15), %rdi
	movslq	%eax, %r14
	call	*%rbx
	movq	%rax, (%r14)
	movsbq	1(%r15), %r15
	testq	%r15, %r15
	je	.L15
	xorl	%eax, %eax
	call	cons
	movsbq	(%r15), %rdi
	cltq
	movq	%rax, 8(%rsp)
	call	*%rbx
	movq	8(%rsp), %rsi
	movq	%rax, (%rsi)
	movsbq	1(%r15), %r15
	testq	%r15, %r15
	je	.L16
	xorl	%eax, %eax
	call	cons
	movsbq	(%r15), %rdi
	cltq
	movq	%rax, 16(%rsp)
	call	*%rbx
	movq	16(%rsp), %rcx
	movq	%rax, (%rcx)
	movsbq	1(%r15), %rdi
	testq	%rdi, %rdi
	movq	%rdi, 24(%rsp)
	je	.L17
	xorl	%eax, %eax
	call	cons
	movq	24(%rsp), %r9
	cltq
	movq	%rax, %r15
	movq	%rax, 32(%rsp)
	movsbq	(%r9), %rdi
	call	*%rbx
	movq	24(%rsp), %r11
	movq	%rax, (%r15)
	movsbq	1(%r11), %rax
	testq	%rax, %rax
	movq	%rax, 24(%rsp)
	je	.L18
	xorl	%eax, %eax
	call	cons
	movq	24(%rsp), %rsi
	cltq
	movq	%rax, %r15
	movq	%rax, 40(%rsp)
	movsbq	(%rsi), %rdi
	call	*%rbx
	movq	24(%rsp), %rcx
	movq	%rax, (%r15)
	movsbq	1(%rcx), %rdi
	testq	%rdi, %rdi
	movq	%rdi, 24(%rsp)
	je	.L19
	xorl	%eax, %eax
	call	cons
	movq	24(%rsp), %r9
	cltq
	movq	%rax, %r15
	movsbq	(%r9), %rdi
	call	*%rbx
	movq	24(%rsp), %r11
	movq	%rax, (%r15)
	movq	%rbx, %rsi
	movsbq	1(%r11), %rdi
	call	map
	movq	%r15, %r8
	movq	%rax, 8(%r15)
.L10:
	movq	40(%rsp), %rax
	movq	%r8, 8(%rax)
	movq	%rax, %rbx
.L9:
	movq	32(%rsp), %rsi
	movq	%rbx, 8(%rsi)
	movq	%rsi, %r8
.L8:
	movq	16(%rsp), %rdx
	movq	%r8, 8(%rdx)
.L7:
	movq	8(%rsp), %rcx
	movq	%rdx, 8(%rcx)
	movq	%rcx, %rax
.L6:
	movq	%rax, 8(%r14)
.L5:
	movq	%r14, 8(%r13)
.L4:
	movq	%r13, 8(%r12)
.L3:
	movq	%r12, 8(%rbp)
	movq	%rbp, %rax
.L2:
	addq	$56, %rsp
	popq	%rbx
	popq	%rbp
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	ret
1 Like