π in assembly (spigot algorithm)
// pi_spigot.s - calculates Pi using a spigot algorithm
// as an array of n digits in base 10000.
// http://mathworld.wolfram.com/SpigotAlgorithm.html
//
// x86-64/SSE3 with for Linux, Intel, gnu assembler, gcc
//
// assemble: as pi_spigot.s -o pi_spigot.o
// link: gcc -o pi_spigot pi_spigot.o
// example run: ./pi_spigot 100
// output: 3.14159265358979323846264338327950288419716939937510582097494459230 ...
// ... 78164062862089986280348253421170679
//
.section .rodata
.LC0:
.string "%d."
.LC1:
.string "%04d"
.text
.globl print
.type print, @function
print:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
movq %rsp, %rbp
.cfi_offset 6, -16
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -24(%rbp)
movl %esi, -28(%rbp)
movq -24(%rbp), %rax
addq $2, %rax
movzwl (%rax), %eax
movzwl %ax, %edx
movl $.LC0, %eax
movl %edx, %esi
movq %rax, %rdi
movl $0, %eax
call printf
movl $2, -4(%rbp)
jmp .L2
.L3:
movl -4(%rbp), %eax
cltq
addq %rax, %rax
addq -24(%rbp), %rax
movzwl (%rax), %eax
movzwl %ax, %edx
movl $.LC1, %eax
movl %edx, %esi
movq %rax, %rdi
movl $0, %eax
call printf
addl $1, -4(%rbp)
.L2:
movl -28(%rbp), %eax
subl $1, %eax
cmpl -4(%rbp), %eax
jg .L3
movl $10, %edi
call putchar
leave
ret
.cfi_endproc
.LFE0:
.size print, .-print
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
movq %rsp, %rbp
.cfi_offset 6, -16
.cfi_def_cfa_register 6
pushq %rbx
subq $56, %rsp
movl %edi, -52(%rbp)
movq %rsi, -64(%rbp)
cmpl $1, -52(%rbp)
jle .L6
.cfi_offset 3, -24
movq -64(%rbp), %rax
addq $8, %rax
movq (%rax), %rax
movq %rax, %rdi
call atoi
addl $3, %eax
leal 3(%rax), %edx
testl %eax, %eax
cmovs %edx, %eax
sarl $2, %eax
addl $3, %eax
jmp .L7
.L6:
movl $253, %eax
.L7:
movl %eax, -20(%rbp)
movl -20(%rbp), %eax
cltq
addq %rax, %rax
movq %rax, %rdi
call malloc
movq %rax, -40(%rbp)
movl -20(%rbp), %eax
cltq
leaq (%rax,%rax), %rdx
movq -40(%rbp), %rax
movl $0, %esi
movq %rax, %rdi
call memset
movq -40(%rbp), %rax
addq $2, %rax
movw $4, (%rax)
cvtsi2sd -20(%rbp), %xmm0
movsd .LC2(%rip), %xmm1
mulsd %xmm1, %xmm0
cvttsd2si %xmm0, %eax
movl %eax, -24(%rbp)
jmp .L8
.L13:
movl $0, -32(%rbp)
movl -20(%rbp), %eax
subl $1, %eax
movl %eax, -28(%rbp)
jmp .L9
.L10:
movl -28(%rbp), %eax
cltq
addq %rax, %rax
addq -40(%rbp), %rax
movzwl (%rax), %eax
movzwl %ax, %eax
imull -24(%rbp), %eax
addl %eax, -32(%rbp)
movl -28(%rbp), %eax
cltq
addq %rax, %rax
movq %rax, %rbx
addq -40(%rbp), %rbx
movl -32(%rbp), %ecx
movl $1759218605, %edx
movl %ecx, %eax
imull %edx
sarl $12, %edx
movl %ecx, %eax
sarl $31, %eax
movl %edx, %esi
subl %eax, %esi
movl %esi, %eax
imull $10000, %eax, %eax
movl %ecx, %edx
subl %eax, %edx
movl %edx, %eax
movw %ax, (%rbx)
movl -32(%rbp), %ecx
movl $1759218605, %edx
movl %ecx, %eax
imull %edx
sarl $12, %edx
movl %ecx, %eax
sarl $31, %eax
movl %edx, %ecx
subl %eax, %ecx
movl %ecx, %eax
movl %eax, -32(%rbp)
subl $1, -28(%rbp)
.L9:
cmpl $0, -28(%rbp)
jns .L10
movl $0, -44(%rbp)
movl -44(%rbp), %eax
movl %eax, -48(%rbp)
movl $0, -28(%rbp)
jmp .L11
.L12:
movl -24(%rbp), %eax
addl %eax, %eax
leal 1(%rax), %edx
movl -28(%rbp), %eax
cltq
addq %rax, %rax
addq -40(%rbp), %rax
movzwl (%rax), %eax
movzwl %ax, %ecx
movl -44(%rbp), %eax
imull $10000, %eax, %eax
leal (%rcx,%rax), %eax
movl %edx, %esi
movl %eax, %edi
call div
movq %rax, -48(%rbp)
movl -28(%rbp), %eax
cltq
addq %rax, %rax
addq -40(%rbp), %rax
movl -48(%rbp), %edx
movw %dx, (%rax)
addl $1, -28(%rbp)
.L11:
movl -28(%rbp), %eax
cmpl -20(%rbp), %eax
jl .L12
movq -40(%rbp), %rax
addq $2, %rax
movq -40(%rbp), %rdx
addq $2, %rdx
movzwl (%rdx), %edx
addl $2, %edx
movw %dx, (%rax)
subl $1, -24(%rbp)
.L8:
cmpl $0, -24(%rbp)
jg .L13
movl -20(%rbp), %edx
movq -40(%rbp), %rax
movl %edx, %esi
movq %rax, %rdi
call print
movl $0, %eax
addq $56, %rsp
popq %rbx
leave
ret
.cfi_endproc
.LFE1:
.size main, .-main
.section .rodata
.align 8
.LC2:
.long 3161095930
.long 1076532084