
// pi_spigot.s - calculates Pi using a spigot algorithm // as an array of n digits in base 10000. // http://mathworld.wolfram.com/SpigotAlgorithm.html // // x86-64/SSE3 with for Linux, Intel, gnu assembler, gcc // // assemble: as pi_spigot.s -o pi_spigot.o // link: gcc -o pi_spigot pi_spigot.o // example run: ./pi_spigot 100 // output: 3.14159265358979323846264338327950288419716939937510582097494459230 ... // ... 78164062862089986280348253421170679 // .section .rodata .LC0: .string "%d." .LC1: .string "%04d" .text .globl print .type print, @function print: .LFB0: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 movq %rsp, %rbp .cfi_offset 6, -16 .cfi_def_cfa_register 6 subq $32, %rsp movq %rdi, -24(%rbp) movl %esi, -28(%rbp) movq -24(%rbp), %rax addq $2, %rax movzwl (%rax), %eax movzwl %ax, %edx movl $.LC0, %eax movl %edx, %esi movq %rax, %rdi movl $0, %eax call printf movl $2, -4(%rbp) jmp .L2 .L3: movl -4(%rbp), %eax cltq addq %rax, %rax addq -24(%rbp), %rax movzwl (%rax), %eax movzwl %ax, %edx movl $.LC1, %eax movl %edx, %esi movq %rax, %rdi movl $0, %eax call printf addl $1, -4(%rbp) .L2: movl -28(%rbp), %eax subl $1, %eax cmpl -4(%rbp), %eax jg .L3 movl $10, %edi call putchar leave ret .cfi_endproc .LFE0: .size print, .-print .globl main .type main, @function main: .LFB1: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 movq %rsp, %rbp .cfi_offset 6, -16 .cfi_def_cfa_register 6 pushq %rbx subq $56, %rsp movl %edi, -52(%rbp) movq %rsi, -64(%rbp) cmpl $1, -52(%rbp) jle .L6 .cfi_offset 3, -24 movq -64(%rbp), %rax addq $8, %rax movq (%rax), %rax movq %rax, %rdi call atoi addl $3, %eax leal 3(%rax), %edx testl %eax, %eax cmovs %edx, %eax sarl $2, %eax addl $3, %eax jmp .L7 .L6: movl $253, %eax .L7: movl %eax, -20(%rbp) movl -20(%rbp), %eax cltq addq %rax, %rax movq %rax, %rdi call malloc movq %rax, -40(%rbp) movl -20(%rbp), %eax cltq leaq (%rax,%rax), %rdx movq -40(%rbp), %rax movl $0, %esi movq %rax, %rdi call memset movq -40(%rbp), %rax addq $2, %rax movw $4, (%rax) cvtsi2sd -20(%rbp), %xmm0 movsd .LC2(%rip), %xmm1 mulsd %xmm1, %xmm0 cvttsd2si %xmm0, %eax movl %eax, -24(%rbp) jmp .L8 .L13: movl $0, -32(%rbp) movl -20(%rbp), %eax subl $1, %eax movl %eax, -28(%rbp) jmp .L9 .L10: movl -28(%rbp), %eax cltq addq %rax, %rax addq -40(%rbp), %rax movzwl (%rax), %eax movzwl %ax, %eax imull -24(%rbp), %eax addl %eax, -32(%rbp) movl -28(%rbp), %eax cltq addq %rax, %rax movq %rax, %rbx addq -40(%rbp), %rbx movl -32(%rbp), %ecx movl $1759218605, %edx movl %ecx, %eax imull %edx sarl $12, %edx movl %ecx, %eax sarl $31, %eax movl %edx, %esi subl %eax, %esi movl %esi, %eax imull $10000, %eax, %eax movl %ecx, %edx subl %eax, %edx movl %edx, %eax movw %ax, (%rbx) movl -32(%rbp), %ecx movl $1759218605, %edx movl %ecx, %eax imull %edx sarl $12, %edx movl %ecx, %eax sarl $31, %eax movl %edx, %ecx subl %eax, %ecx movl %ecx, %eax movl %eax, -32(%rbp) subl $1, -28(%rbp) .L9: cmpl $0, -28(%rbp) jns .L10 movl $0, -44(%rbp) movl -44(%rbp), %eax movl %eax, -48(%rbp) movl $0, -28(%rbp) jmp .L11 .L12: movl -24(%rbp), %eax addl %eax, %eax leal 1(%rax), %edx movl -28(%rbp), %eax cltq addq %rax, %rax addq -40(%rbp), %rax movzwl (%rax), %eax movzwl %ax, %ecx movl -44(%rbp), %eax imull $10000, %eax, %eax leal (%rcx,%rax), %eax movl %edx, %esi movl %eax, %edi call div movq %rax, -48(%rbp) movl -28(%rbp), %eax cltq addq %rax, %rax addq -40(%rbp), %rax movl -48(%rbp), %edx movw %dx, (%rax) addl $1, -28(%rbp) .L11: movl -28(%rbp), %eax cmpl -20(%rbp), %eax jl .L12 movq -40(%rbp), %rax addq $2, %rax movq -40(%rbp), %rdx addq $2, %rdx movzwl (%rdx), %edx addl $2, %edx movw %dx, (%rax) subl $1, -24(%rbp) .L8: cmpl $0, -24(%rbp) jg .L13 movl -20(%rbp), %edx movq -40(%rbp), %rax movl %edx, %esi movq %rax, %rdi call print movl $0, %eax addq $56, %rsp popq %rbx leave ret .cfi_endproc .LFE1: .size main, .-main .section .rodata .align 8 .LC2: .long 3161095930 .long 1076532084