#ifndef DDM_H #define DDM_H //// /* Double float math */ #if defined(__GNUC__) #define DFM_STRICT_MATH __attribute__((__optimize__("no-associative-math", "no-unsafe-math-optimizations", "no-fast-math"))) #define DFM_ALWAYSINLINE __attribute__((always_inline)) #define DFM_SAFE_MATH (1) #else #define DFM_STRICT_MATH #define DFM_ALWAYSINLINE #define DFM_SAFE_MATH (0) #endif typedef struct { float hi; float lo; } dfmValue; /* Floats have 24 bits of mantissa ; split at (mantissa+1)/2 bits, plus one ; 2^12 + 1 */ #define DFM_SPLIT (4097.0) static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmSetFlt( dfmValue *z, float a ) { z->hi = a; z->lo = 0.0; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmSet( dfmValue *z, float hi, float lo ) { z->hi = hi; z->lo = lo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmNegate( dfmValue *z, dfmValue *a ) { z->hi = -a->hi; z->lo = -a->lo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmAdd( dfmValue *z, dfmValue *a, dfmValue *b ) { float rlo, rhi, ehi, elo, nhi, e, zhi, zlo; rhi = a->hi + b->hi; rlo = a->lo + b->lo; ehi = a->hi - rhi; elo = a->lo - rlo; e = ( ( ehi + b->hi ) + ( a->hi - ( ehi + rhi ) ) ) + rlo; nhi = rhi + e; e = ( ( elo + b->lo ) + ( a->lo - ( elo + rlo ) ) ) + ( e + ( rhi - nhi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmSub( dfmValue *z, dfmValue *a, dfmValue *b ) { float rlo, rhi, ehi, elo, nhi, e, zhi, zlo; rhi = a->hi - b->hi; rlo = a->lo - b->lo; ehi = a->hi - rhi; elo = a->lo - rlo; e = ( ( ehi - b->hi ) + ( a->hi - ( ehi + rhi ) ) ) + rlo; nhi = rhi + e; e = ( ( elo - b->lo ) + ( a->lo - ( elo + rlo ) ) ) + ( e + ( rhi - nhi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmAddFlt( dfmValue *z, dfmValue *a, float b ) { float rhi, ehi, e, zhi, zlo; rhi = a->hi + b; ehi = a->hi - rhi; e = ( ( ehi + b ) + ( a->hi - ( ehi + rhi ) ) ) + a->lo; zhi = e + rhi; zlo = e + ( rhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmSubFlt( dfmValue *z, dfmValue *a, float b ) { float rhi, ehi, e, zhi, zlo; rhi = a->hi - b; ehi = a->hi - rhi; e = ( ( ehi - b ) + ( a->hi - ( ehi + rhi ) ) ) + a->lo; zhi = e + rhi; zlo = e + ( rhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmMul( dfmValue *z, dfmValue *a, dfmValue *b ) { float hx, tx, hy, ty, nhi, rhi, e, zhi, zlo; nhi = DFM_SPLIT * a->hi; rhi = DFM_SPLIT * b->hi; hx = a->hi - nhi; hy = b->hi - rhi; hx += nhi; hy += rhi; tx = a->hi - hx; ty = b->hi - hy; nhi = a->hi * b->hi; e = ( ( ( ( ( hx * hy ) - nhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) + ( ( a->hi * b->lo ) + ( a->lo * b->hi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmSquare( dfmValue *z, dfmValue *a ) { float hx, tx, nhi, e, zhi, zlo, asum; nhi = DFM_SPLIT * a->hi; hx = a->hi - nhi; hx += nhi; tx = a->hi - hx; nhi = a->hi * a->hi; asum = a->hi * a->lo; e = ( ( ( ( ( hx * hx ) - nhi ) + ( hx * tx ) ) + ( tx * hx ) ) + ( tx * tx ) ) + ( asum + asum ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmMulFlt( dfmValue *z, dfmValue *a, float b ) { float hx, tx, hy, ty, nhi, rhi, e, zhi, zlo; nhi = DFM_SPLIT * a->hi; rhi = DFM_SPLIT * b; hx = a->hi - nhi; hy = b - rhi; hx += nhi; hy += rhi; tx = a->hi - hx; ty = b - hy; nhi = a->hi * b; e = ( ( ( ( ( hx * hy ) - nhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) + ( a->lo * b ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmMulFltFast( dfmValue *z, dfmValue *a, float b ) { z->hi = a->hi * b; z->lo = a->lo * b; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmMulFast( dfmValue *z, dfmValue *a, dfmValue *b ) { float tx, ty; ty = a->hi * b->hi; tx = a->lo * b->lo; tx += a->hi * b->lo; tx += a->lo * b->hi; z->hi = ty + tx; z->lo = ( ty - z->hi ) + tx; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmSquareFast( dfmValue *z, dfmValue *a ) { float tx, ty, tw; ty = a->hi * a->hi; tx = a->lo * a->lo; tw = a->hi * a->lo; tx += tw + tw; z->hi = ty + tx; z->lo = ( ty - z->hi ) + tx; return; } static inline DFM_ALWAYSINLINE DFM_STRICT_MATH void dfmDiv( dfmValue *z, dfmValue *a, dfmValue *b ) { float hx, tx, hy, ty, nhi, rhi, e, qhi, zhi, zlo; nhi = a->hi / b->hi; rhi = DFM_SPLIT * nhi; qhi = DFM_SPLIT * b->hi; hx = nhi - rhi; hy = b->hi - qhi; hx += rhi; hy += qhi; tx = nhi - hx; ty = b->hi - hy; qhi = nhi * b->hi; e = ( ( ( ( a->hi - qhi ) - ( ( ( ( ( hx * hy ) - qhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) ) + a->lo ) - ( nhi * b->lo ) ) / b->hi; zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } //// /* Double double math */ #if defined(__GNUC__) #define DDM_STRICT_MATH __attribute__((__optimize__("no-associative-math", "no-unsafe-math-optimizations", "no-fast-math"))) #define DDM_ALWAYSINLINE __attribute__((always_inline)) #define DDM_SAFE_MATH (1) #else #define DDM_STRICT_MATH #define DDM_ALWAYSINLINE #define DDM_SAFE_MATH (0) #endif typedef struct { double hi; double lo; } ddmValue; /* Doubles have 53 bits of mantissa ; split at (mantissa+1)/2 bits, plus one ; 2^27 + 1 */ #define DDM_SPLIT (134217729.0) static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmSetDbl( ddmValue *z, double a ) { z->hi = a; z->lo = 0.0; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmSet( ddmValue *z, double hi, double lo ) { z->hi = hi; z->lo = lo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmNegate( ddmValue *z, ddmValue *a ) { z->hi = -a->hi; z->lo = -a->lo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmAdd( ddmValue *z, ddmValue *a, ddmValue *b ) { double rlo, rhi, ehi, elo, nhi, e, zhi, zlo; rhi = a->hi + b->hi; rlo = a->lo + b->lo; ehi = a->hi - rhi; elo = a->lo - rlo; e = ( ( ehi + b->hi ) + ( a->hi - ( ehi + rhi ) ) ) + rlo; nhi = rhi + e; e = ( ( elo + b->lo ) + ( a->lo - ( elo + rlo ) ) ) + ( e + ( rhi - nhi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmSub( ddmValue *z, ddmValue *a, ddmValue *b ) { double rlo, rhi, ehi, elo, nhi, e, zhi, zlo; rhi = a->hi - b->hi; rlo = a->lo - b->lo; ehi = a->hi - rhi; elo = a->lo - rlo; e = ( ( ehi - b->hi ) + ( a->hi - ( ehi + rhi ) ) ) + rlo; nhi = rhi + e; e = ( ( elo - b->lo ) + ( a->lo - ( elo + rlo ) ) ) + ( e + ( rhi - nhi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmAddDbl( ddmValue *z, ddmValue *a, double b ) { double rhi, ehi, e, zhi, zlo; rhi = a->hi + b; ehi = a->hi - rhi; e = ( ( ehi + b ) + ( a->hi - ( ehi + rhi ) ) ) + a->lo; zhi = e + rhi; zlo = e + ( rhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmSubDbl( ddmValue *z, ddmValue *a, double b ) { double rhi, ehi, e, zhi, zlo; rhi = a->hi - b; ehi = a->hi - rhi; e = ( ( ehi - b ) + ( a->hi - ( ehi + rhi ) ) ) + a->lo; zhi = e + rhi; zlo = e + ( rhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmMul( ddmValue *z, ddmValue *a, ddmValue *b ) { double hx, tx, hy, ty, nhi, rhi, e, zhi, zlo; nhi = DDM_SPLIT * a->hi; rhi = DDM_SPLIT * b->hi; hx = a->hi - nhi; hy = b->hi - rhi; hx += nhi; hy += rhi; tx = a->hi - hx; ty = b->hi - hy; nhi = a->hi * b->hi; e = ( ( ( ( ( hx * hy ) - nhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) + ( ( a->hi * b->lo ) + ( a->lo * b->hi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmSquare( ddmValue *z, ddmValue *a ) { double hx, tx, nhi, e, zhi, zlo, asum; nhi = DDM_SPLIT * a->hi; hx = a->hi - nhi; hx += nhi; tx = a->hi - hx; nhi = a->hi * a->hi; asum = a->hi * a->lo; e = ( ( ( ( ( hx * hx ) - nhi ) + ( hx * tx ) ) + ( tx * hx ) ) + ( tx * tx ) ) + ( asum + asum ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmMulDbl( ddmValue *z, ddmValue *a, double b ) { double hx, tx, hy, ty, nhi, rhi, e, zhi, zlo; nhi = DDM_SPLIT * a->hi; rhi = DDM_SPLIT * b; hx = a->hi - nhi; hy = b - rhi; hx += nhi; hy += rhi; tx = a->hi - hx; ty = b - hy; nhi = a->hi * b; e = ( ( ( ( ( hx * hy ) - nhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) + ( a->lo * b ); zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmMulDblFast( ddmValue *z, ddmValue *a, double b ) { z->hi = a->hi * b; z->lo = a->lo * b; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmMulFast( ddmValue *z, ddmValue *a, ddmValue *b ) { double tx, ty; ty = a->hi * b->hi; tx = a->lo * b->lo; tx += a->hi * b->lo; tx += a->lo * b->hi; z->hi = ty + tx; z->lo = ( ty - z->hi ) + tx; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmSquareFast( ddmValue *z, ddmValue *a ) { double tx, ty, tw; ty = a->hi * a->hi; tx = a->lo * a->lo; tw = a->hi * a->lo; tx += tw + tw; z->hi = ty + tx; z->lo = ( ty - z->hi ) + tx; return; } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH void ddmDiv( ddmValue *z, ddmValue *a, ddmValue *b ) { double hx, tx, hy, ty, nhi, rhi, e, qhi, zhi, zlo; nhi = a->hi / b->hi; rhi = DDM_SPLIT * nhi; qhi = DDM_SPLIT * b->hi; hx = nhi - rhi; hy = b->hi - qhi; hx += rhi; hy += qhi; tx = nhi - hx; ty = b->hi - hy; qhi = nhi * b->hi; e = ( ( ( ( a->hi - qhi ) - ( ( ( ( ( hx * hy ) - qhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) ) + a->lo ) - ( nhi * b->lo ) ) / b->hi; zhi = e + nhi; zlo = e + ( nhi - zhi ); z->hi = zhi; z->lo = zlo; return; } //// #if CPU_SSE3_SUPPORT #define DDM_SSE2_SUPPORT (0) /* Experimental stuff */ static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2SetDbl( double a ) { return _mm_set_pd( a, 0.0 ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2Set( double hi, double lo ) { return _mm_set_pd( hi, lo ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2Negate( __m128d z ) { return _mm_xor_pd( z, _mm_set_pd( 0x800000000000, 0x800000000000 ) ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2Add( __m128d a, __m128d b ) { __m128d z, r, e, ep, rhi, nhi, zhi, zlo, ephi; r = _mm_add_pd( a, b ); e = _mm_sub_pd( a, r ); ep = _mm_add_pd( _mm_add_pd( e, b ), _mm_sub_pd( a, _mm_add_pd( e, r ) ) ); ephi = _mm_unpackhi_pd( ep, ep ); ephi = _mm_add_sd( ephi, r ); rhi = _mm_unpackhi_pd( r, r ); nhi = _mm_add_sd( rhi, ephi ); ep = _mm_add_sd( ep, _mm_add_sd( ephi, _mm_sub_sd( rhi, nhi ) ) ); zhi = _mm_add_sd( ep, nhi ); zlo = _mm_add_sd( ep, _mm_sub_sd( nhi, zhi ) ); return _mm_unpacklo_pd( zlo, zhi ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2Sub( __m128d a, __m128d b ) { __m128d z, r, e, ep, rhi, nhi, zhi, zlo, ephi; r = _mm_sub_pd( a, b ); e = _mm_sub_pd( a, r ); ep = _mm_add_pd( _mm_sub_pd( e, b ), _mm_sub_pd( a, _mm_add_pd( e, r ) ) ); ephi = _mm_unpackhi_pd( ep, ep ); ephi = _mm_add_sd( ephi, r ); rhi = _mm_unpackhi_pd( r, r ); nhi = _mm_add_sd( rhi, ephi ); ep = _mm_add_sd( ep, _mm_add_sd( ephi, _mm_sub_sd( rhi, nhi ) ) ); zhi = _mm_add_sd( ep, nhi ); zlo = _mm_add_sd( ep, _mm_sub_sd( nhi, zhi ) ); return _mm_unpacklo_pd( zlo, zhi ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2MulDbl( __m128d a, double b ) { return _mm_mul_pd( a, _mm_set1_pd( b ) ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2Mul( __m128d a, __m128d b ) { #if 0 __m128d abhi, nrhi, h, t, nhi, ep, zhi, zlo, tw, hw, aw; abhi = _mm_unpackhi_pd( a, b ); nrhi = _mm_mul_pd( _mm_set1_pd( DDM_SPLIT ), abhi ); h = _mm_sub_pd( abhi, nrhi ); h = _mm_add_pd( h, nrhi ); t = _mm_sub_pd( abhi, h ); nhi = _mm_mul_pd( a, b ); nhi = _mm_unpackhi_pd( nhi, nhi ); tw = _mm_shuffle_pd( t, t, 0x1 ); hw = _mm_shuffle_pd( h, h, 0x1 ); aw = _mm_shuffle_pd( a, a, 0x1 ); ep = _mm_sub_sd( _mm_mul_sd( h, hw ), nhi ); hw = _mm_hadd_pd( _mm_mul_pd( hw, t ), _mm_mul_pd( aw, b ) ); ep = _mm_add_sd( _mm_add_sd( _mm_add_sd( ep, hw ), _mm_mul_sd( tw, t ) ), _mm_unpackhi_pd( hw, hw ) ); zhi = _mm_add_sd( ep, nhi ); zlo = _mm_add_sd( ep, _mm_sub_sd( nhi, zhi ) ); return _mm_unpacklo_pd( zlo, zhi ); #else double ahi, alo, bhi, blo; double hx, tx, hy, ty, nhi, rhi, e, zhi, zlo; alo = _mm_cvtsd_f64( a ); ahi = _mm_cvtsd_f64( _mm_unpackhi_pd( a, a ) ); blo = _mm_cvtsd_f64( b ); bhi = _mm_cvtsd_f64( _mm_unpackhi_pd( b, b ) ); nhi = DDM_SPLIT * ahi; rhi = DDM_SPLIT * bhi; hx = ahi - nhi; hy = bhi - rhi; hx += nhi; hy += rhi; tx = ahi - hx; ty = bhi - hy; nhi = ahi * bhi; e = ( ( ( ( ( hx * hy ) - nhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) + ( ( ahi * blo ) + ( alo * bhi ) ); zhi = e + nhi; zlo = e + ( nhi - zhi ); return _mm_set_pd( zhi, zlo ); #endif } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2MulFast( __m128d a, __m128d b ) { __m128d t, aw, zhi, zlo, thi; aw = _mm_shuffle_pd( a, a, 0x1 ); t = _mm_mul_pd( a, b ); aw = _mm_mul_pd( aw, b ); t = _mm_add_sd( t, _mm_hadd_pd( aw, aw ) ); thi = _mm_unpackhi_pd( t, t ); zhi = _mm_add_sd( t, thi ); zlo = _mm_add_sd( _mm_sub_sd( thi, zhi ), t ); return _mm_unpacklo_pd( zlo, zhi ); } static inline DDM_ALWAYSINLINE DDM_STRICT_MATH __m128d ddmSSE2Div( __m128d a, __m128d b ) { double ahi, alo, bhi, blo; double hx, tx, hy, ty, nhi, rhi, e, qhi, zhi, zlo; alo = _mm_cvtsd_f64( a ); ahi = _mm_cvtsd_f64( _mm_unpackhi_pd( a, a ) ); blo = _mm_cvtsd_f64( b ); bhi = _mm_cvtsd_f64( _mm_unpackhi_pd( b, b ) ); nhi = ahi / bhi; rhi = DDM_SPLIT * nhi; qhi = DDM_SPLIT * bhi; hx = nhi - rhi; hy = bhi - qhi; hx += rhi; hy += qhi; tx = nhi - hx; ty = bhi - hy; qhi = nhi * bhi; e = ( ( ( ( ahi - qhi ) - ( ( ( ( ( hx * hy ) - qhi ) + ( hx * ty ) ) + ( tx * hy ) ) + ( tx * ty ) ) ) + alo ) - ( nhi * blo ) ) / bhi; zhi = e + nhi; zlo = e + ( nhi - zhi ); return _mm_set_pd( zhi, zlo ); } #endif //// #endif