Happy π approximation day/night (in assembly) !


//   pi_x64.s - calculates Pi using the Leibniz formula.
//              Each iteration prints a closer approximation to 50 digits.
//              This is not an optimal implementation and it runs forever.
//
//  x86-64/SSE3 with for Linux, Intel, gnu assembler, gcc
//
//  assemble: as pi_x64.s -o pi_x64.o
//  link:     gcc -o pi_x64 pi_x64.o
//  run:      ./pi_x64
//  output: 3.14159264858204423376264458056539297103881835937500
//          3.14159265108366625440794450696557760238647460937500
//          3.14159265191852199450295302085578441619873046875000
//          3.14159265233600137889879988506436347961425781250000
//          .... and on forever ...
.section .data
        .align 16
denom:  .double  1.0, 3.0
numer:  .double  4.0, -4.0
add4:   .double  4.0, 4.0
zero:   .double  0.0, 0.0
msg:    .string  "%1.50fn"
.section .text
        .globl main
        .type main, @function
        .align 64
main:
        pushq   %rbp
        movq    %rsp, %rbp
        movdqa  (numer), %xmm2
        movdqa  (denom), %xmm6
        movdqa  (add4), %xmm3
        movdqa  %xmm2, %xmm4
        movdqa  (zero), %xmm5
        movq    $100000000, %r12
loop:
        divpd  %xmm6, %xmm2
        addpd  %xmm2, %xmm5
        movdqa %xmm4, %xmm2
        addpd  %xmm3, %xmm6
        subq $1, %r12
        jnz loop
        movq   $100000000, %r12
        movdqa %xmm5, %xmm0
        movdqa %xmm6, %xmm1
        haddpd %xmm0, %xmm0
        movq  $1, %rax
        movq $msg, %rdi
        call printf
        movdqa (add4), %xmm3
        jmp loop
        movq $0, %rax
        popq %rbp
        ret