/Users/mdipierro/fermiqcd/development/Libraries/fermiqcd_sse.h File Reference

Go to the source code of this file.

Classes

struct  _sse_float
struct  _sse_vector
struct  _sse_int
struct  _sse_double
struct  _sse_su3
struct  _sse_su3_vector
struct  _sse_spinor

Defines

#define ALIGN16   __attribute__ ((aligned (16)))
#define ALIGN64   __attribute__ ((aligned (64)))
#define _ASM   __asm__ __volatile__
#define _sse_float_prefetch_spinor(addr)
#define _sse_float_prefetch_su3(addr)
#define _sse_float_pair_load(sl, sh)
#define _sse_float_pair_load_up(sl, sh)
#define _sse_float_pair_store(rl, rh)
#define _sse_float_pair_store_up(rl, rh)
#define _sse_float_vector_load(s)
#define _sse_float_vector_load_up(s)
#define _sse_float_vector_store(r)
#define _sse_float_vector_mul(c)
#define _sse_float_vector_add()
#define _sse_float_vector_sub()
#define _sse_float_vector_addsub()
#define _sse_float_su3_multiply(u)
#define _sse_float_su3_inverse_multiply(u)
#define _sse_float_vector_subadd()
#define _sse_float_vector_i_add()
#define _sse_float_vector_i_sub()
#define _sse_float_vector_xch_i_add()
#define _sse_float_vector_xch_i_sub()
#define _sse_float_vector_i_addsub()
#define _sse_float_vector_i_subadd()
#define _sse_float_vector_xch()
#define _sse_double_prefetch_16(addr)
#define _sse_double_prefetch_spinor(addr)
#define _sse_double_prefetch_nta_spinor(addr)
#define _sse_double_prefetch_su3(addr)
#define _sse_double_load(s)
#define _sse_double_load_123(c1, c2, c3)
#define _sse_double_load_up(s)
#define _sse_double_load_up_123(c1, c2, c3)
#define _sse_double_store(r)
#define _sse_double_store_123(c1, c2, c3)
#define _sse_double_store_up(r)
#define _sse_double_store_up_123(c1, c2, c3)
#define _sse_double_vector_mul(c)
#define _sse_double_vector_mul_complex(x, y)
#define _sse_double_vector_add()
#define _sse_double_vector_sub()
#define _sse_double_su3_multiply(u)
#define _sse_double_su3_inverse_multiply(u)
#define _sse_double_vector_i_mul()
#define _sse_double_vector_minus_i_mul()
#define _sse_double_add_norm_square_16(r, c)
#define _sse_double_add_real_scalar_product_16(r, s, c)
#define _sse_double_add_imag_scalar_product_16(r, s, c)
#define _sse_double_hermitian_su3(r, s)
#define _sse_double_copy_16(r, s)
#define _sse_double_add_16(r, s)
#define _sse_double_sub_16(r, s)
#define _sse_double_add_multiply_16(r, c, s)
#define _sse_double_multiply_16(r, c, s)

Detailed Description

Version:
2009-12-21
Author:
Martin Luesher and Massimo Di Pierro <mdipierro@cs.depaul.edu>

Basic actions for Wilson Fermions optimized in assembler


Define Documentation

#define _ASM   __asm__ __volatile__
#define _sse_double_add_16 ( r,
 ) 
#define _sse_double_add_imag_scalar_product_16 ( r,
s,
 ) 
#define _sse_double_add_multiply_16 ( r,
c,
 ) 
#define _sse_double_add_norm_square_16 ( r,
 ) 
#define _sse_double_add_real_scalar_product_16 ( r,
s,
 ) 
#define _sse_double_copy_16 ( r,
 ) 
#define _sse_double_hermitian_su3 ( r,
 ) 
#define _sse_double_load (  ) 
Value:
_ASM ("movapd %0, %%xmm0 \n\t" \
      "movapd %1, %%xmm1 \n\t" \
      "movapd %2, %%xmm2" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))
#define _sse_double_load_123 ( c1,
c2,
c3   ) 
Value:
_ASM ("movapd %0, %%xmm0 \n\t" \
      "movapd %1, %%xmm1 \n\t" \
      "movapd %2, %%xmm2" \
      : \
      : \
      "m" (c1), \
      "m" (c2), \
      "m" (c3))
#define _sse_double_load_up (  ) 
Value:
_ASM ("movapd %0, %%xmm3 \n\t" \
      "movapd %1, %%xmm4 \n\t" \
      "movapd %2, %%xmm5" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))
#define _sse_double_load_up_123 ( c1,
c2,
c3   ) 
Value:
_ASM ("movapd %0, %%xmm3 \n\t" \
      "movapd %1, %%xmm4 \n\t" \
      "movapd %2, %%xmm5" \
      : \
      : \
      "m" (c1), \
      "m" (c2), \
      "m" (c3))
#define _sse_double_multiply_16 ( r,
c,
 ) 
#define _sse_double_prefetch_16 ( addr   ) 
Value:
_ASM ("prefetcht0 %0" \
      : \
      : "m" (*(addr)))
#define _sse_double_prefetch_nta_spinor ( addr   ) 
Value:
_ASM ("prefetchnta %0 \n\t" \
      "prefetchnta %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
#define _sse_double_prefetch_spinor ( addr   ) 
Value:
_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
#define _sse_double_prefetch_su3 ( addr   ) 
Value:
_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
#define _sse_double_store (  ) 
Value:
_ASM ("movapd %%xmm0, %0 \n\t" \
      "movapd %%xmm1, %1 \n\t" \
      "movapd %%xmm2, %2" \
      : \
      "=m" ((r).c1), \
      "=m" ((r).c2), \
      "=m" ((r).c3))
#define _sse_double_store_123 ( c1,
c2,
c3   ) 
Value:
_ASM ("movapd %%xmm0, %0 \n\t" \
      "movapd %%xmm1, %1 \n\t" \
      "movapd %%xmm2, %2" \
      : \
      "=m" (c1), \
      "=m" (c2), \
      "=m" (c3))
#define _sse_double_store_up (  ) 
Value:
_ASM ("movapd %%xmm3, %0 \n\t" \
      "movapd %%xmm4, %1 \n\t" \
      "movapd %%xmm5, %2" \
      : \
      "=m" ((r).c1), \
      "=m" ((r).c2), \
      "=m" ((r).c3))
#define _sse_double_store_up_123 ( c1,
c2,
c3   ) 
Value:
_ASM ("movapd %%xmm3, %0 \n\t" \
      "movapd %%xmm4, %1 \n\t" \
      "movapd %%xmm5, %2" \
      : \
      "=m" (c1), \
      "=m" (c2), \
      "=m" (c3))
#define _sse_double_su3_inverse_multiply (  ) 
#define _sse_double_su3_multiply (  ) 
#define _sse_double_sub_16 ( r,
 ) 
 
#define _sse_double_vector_add (  ) 
Value:
_ASM ("addpd %%xmm3, %%xmm0 \n\t" \
      "addpd %%xmm4, %%xmm1 \n\t" \
      "addpd %%xmm5, %%xmm2" \
      : \
      :)
 
#define _sse_double_vector_i_mul (  ) 
Value:
_ASM ("shufpd $0x1, %%xmm3, %%xmm3 \n\t" \
      "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \
      "shufpd $0x1, %%xmm5, %%xmm5 \n\t" \
      "xorpd %0, %%xmm3 \n\t" \
      "xorpd %0, %%xmm4 \n\t" \
      "xorpd %0, %%xmm5" \
      : \
      : \
      "m" (_sse_double_sgn))
 
#define _sse_double_vector_minus_i_mul (  ) 
Value:
_ASM ("xorpd %0, %%xmm3 \n\t" \
      "xorpd %0, %%xmm4 \n\t" \
      "xorpd %0, %%xmm5 \n\t" \
      "shufpd $0x1, %%xmm3, %%xmm3 \n\t" \
      "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \
      "shufpd $0x1, %%xmm5, %%xmm5" \
      : \
      : \
      "m" (_sse_double_sgn))
#define _sse_double_vector_mul (  ) 
Value:
_ASM ("mulpd %0, %%xmm0 \n\t" \
      "mulpd %0, %%xmm1 \n\t" \
      "mulpd %0, %%xmm2" \
      : \
      : \
      "m" (c))
#define _sse_double_vector_mul_complex ( x,
 ) 
Value:
_ASM ("movapd %%xmm0, %%xmm3 \n\t" \
      "movapd %%xmm1, %%xmm4 \n\t" \
      "movapd %%xmm2, %%xmm5 \n\t" \
      "mulpd %1, %%xmm3 \n\t" \
      "mulpd %1, %%xmm4 \n\t" \
      "mulpd %1, %%xmm5 \n\t" \
      "shufpd $0x1, %%xmm3, %%xmm3 \n\t" \
      "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \
      "shufpd $0x1, %%xmm5, %%xmm5 \n\t" \
      "xorpd %2, %%xmm3 \n\t" \
      "xorpd %2, %%xmm4 \n\t" \
      "xorpd %2, %%xmm5 \n\t" \
      "mulpd %0, %%xmm0 \n\t" \
      "mulpd %0, %%xmm1 \n\t" \
      "mulpd %0, %%xmm2 \n\t" \
      "addpd %%xmm0, %%xmm3 \n\t" \
      "addpd %%xmm1, %%xmm4 \n\t" \
      "addpd %%xmm2, %%xmm5" \
      : \
      : \
      "m" (x), \
      "m" (y), \
      "m" (_sse_double_sgn))
 
#define _sse_double_vector_sub (  ) 
Value:
_ASM ("subpd %%xmm3, %%xmm0 \n\t" \
      "subpd %%xmm4, %%xmm1 \n\t" \
      "subpd %%xmm5, %%xmm2" \
      : \
      :)
#define _sse_float_pair_load ( sl,
sh   ) 
Value:
_ASM ("movlps %0, %%xmm0 \n\t" \
      "movlps %1, %%xmm1 \n\t" \
      "movlps %2, %%xmm2 \n\t" \
      "movhps %3, %%xmm0 \n\t" \
      "movhps %4, %%xmm1 \n\t" \
      "movhps %5, %%xmm2 " \
       : \
       : \
       "m" ((sl).c1), \
       "m" ((sl).c2), \
       "m" ((sl).c3), \
       "m" ((sh).c1), \
       "m" ((sh).c2), \
       "m" ((sh).c3))
#define _sse_float_pair_load_up ( sl,
sh   ) 
Value:
_ASM ("movlps %0, %%xmm3 \n\t" \
      "movlps %1, %%xmm4 \n\t" \
      "movlps %2, %%xmm5 \n\t" \
      "movhps %3, %%xmm3 \n\t" \
      "movhps %4, %%xmm4 \n\t" \
      "movhps %5, %%xmm5" \
      : \
      : \
      "m" ((sl).c1), \
      "m" ((sl).c2), \
      "m" ((sl).c3), \
      "m" ((sh).c1), \
      "m" ((sh).c2), \
      "m" ((sh).c3))
#define _sse_float_pair_store ( rl,
rh   ) 
Value:
_ASM ("movlps %%xmm0, %0 \n\t" \
      "movlps %%xmm1, %1 \n\t" \
      "movlps %%xmm2, %2 \n\t" \
      "movhps %%xmm0, %3 \n\t" \
      "movhps %%xmm1, %4 \n\t" \
      "movhps %%xmm2, %5" \
      : \
      "=m" ((rl).c1), \
      "=m" ((rl).c2), \
      "=m" ((rl).c3), \
      "=m" ((rh).c1), \
      "=m" ((rh).c2), \
      "=m" ((rh).c3))
#define _sse_float_pair_store_up ( rl,
rh   ) 
Value:
_ASM ("movlps %%xmm3, %0 \n\t" \
      "movlps %%xmm4, %1 \n\t" \
      "movlps %%xmm5, %2 \n\t" \
      "movhps %%xmm3, %3 \n\t" \
      "movhps %%xmm4, %4 \n\t" \
      "movhps %%xmm5, %5" \
      : \
      "=m" ((rl).c1), \
      "=m" ((rl).c2), \
      "=m" ((rl).c3), \
      "=m" ((rh).c1), \
      "=m" ((rh).c2), \
      "=m" ((rh).c3))
#define _sse_float_prefetch_spinor ( addr   ) 
Value:
_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
#define _sse_float_prefetch_su3 ( addr   ) 
Value:
_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
#define _sse_float_su3_inverse_multiply (  ) 
#define _sse_float_su3_multiply (  ) 
 
#define _sse_float_vector_add (  ) 
Value:
_ASM ("addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2 \n\t" \
      : \
      : )
 
#define _sse_float_vector_addsub (  ) 
Value:
_ASM ("mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn34))
 
#define _sse_float_vector_i_add (  ) 
Value:
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn13))
 
#define _sse_float_vector_i_addsub (  ) 
Value:
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn14))
 
#define _sse_float_vector_i_sub (  ) 
Value:
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn24))
 
#define _sse_float_vector_i_subadd (  ) 
Value:
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn23))
#define _sse_float_vector_load (  ) 
Value:
_ASM ("movaps %0, %%xmm0 \n\t" \
      "movaps %1, %%xmm1 \n\t" \
      "movaps %2, %%xmm2" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))
#define _sse_float_vector_load_up (  ) 
Value:
_ASM ("movaps %0, %%xmm3 \n\t" \
      "movaps %1, %%xmm4 \n\t" \
      "movaps %2, %%xmm5" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))
#define _sse_float_vector_mul (  ) 
Value:
_ASM ("mulps %0, %%xmm0 \n\t" \
      "mulps %0, %%xmm1 \n\t" \
      "mulps %0, %%xmm2" \
      : \
      : \
      "m" (c))
#define _sse_float_vector_store (  ) 
Value:
_ASM ("movaps %%xmm0, %0 \n\t" \
      "movaps %%xmm1, %1 \n\t" \
      "movaps %%xmm2, %2" \
      : \
      "=m" ((r).c1), \
      "=m" ((r).c2), \
      "=m" ((r).c3))
 
#define _sse_float_vector_sub (  ) 
Value:
_ASM ("subps %%xmm3, %%xmm0 \n\t" \
      "subps %%xmm4, %%xmm1 \n\t" \
      "subps %%xmm5, %%xmm2" \
      : \
      :)
 
#define _sse_float_vector_subadd (  ) 
Value:
_ASM ("mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn12))
 
#define _sse_float_vector_xch (  ) 
Value:
_ASM ("shufps $0x4e, %%xmm3, %%xmm3 \n\t" \
      "shufps $0x4e, %%xmm4, %%xmm4 \n\t" \
      "shufps $0x4e, %%xmm5, %%xmm5" \
      : \
      :)
 
#define _sse_float_vector_xch_i_add (  ) 
Value:
_ASM ("shufps $0x1b, %%xmm3, %%xmm3 \n\t" \
      "shufps $0x1b, %%xmm4, %%xmm4 \n\t" \
      "shufps $0x1b, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn13))
 
#define _sse_float_vector_xch_i_sub (  ) 
Value:
_ASM ("shufps $0x1b, %%xmm3, %%xmm3 \n\t" \
      "shufps $0x1b, %%xmm4, %%xmm4 \n\t" \
      "shufps $0x1b, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn24))
#define ALIGN16   __attribute__ ((aligned (16)))
#define ALIGN64   __attribute__ ((aligned (64)))
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Friends Defines

Generated on Wed Dec 23 14:03:14 2009 for fermiqcd by  doxygen 1.6.1