algebramix_doc 0.3
|
00001 00002 /****************************************************************************** 00003 * MODULE : polynomial_simd.hpp 00004 * DESCRIPTION: Simd support for polynomials 00005 * COPYRIGHT : (C) 2008 Gregoire Lecerf 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX__POLYNOMIAL_SIMD__HPP 00014 #define __MMX__POLYNOMIAL_SIMD__HPP 00015 #include <numerix/simd.hpp> 00016 #ifdef NUMERIX_ENABLE_SIMD 00017 #include <algebramix/polynomial_unrolled.hpp> 00018 00019 namespace mmx { 00020 00021 /****************************************************************************** 00022 * Multiplication 00023 ******************************************************************************/ 00024 00025 #ifdef __SSE2__ 00026 00027 template<typename V, typename C, typename K> 00028 struct polynomial_mul_simd_helper { 00029 typedef implementation<polynomial_linear,V> Pol; 00030 static const nat m = Simd_size (C); 00031 00032 static void 00033 op (C* dest, const C* s1, const K* s2, nat n1, nat n2) { 00034 if (n1 < n2) { 00035 op (dest, s2, s1, n2, n1); 00036 return; 00037 } 00038 nat l = aligned_size<K,V> (n2+m); 00039 K* rev_s2 = mmx_new<K> (l); 00040 for (nat j = 0; j < m; j++) { 00041 nat i = 1 + ((n2 + m - j - 1) % m); 00042 K* d = dest + i - 1; 00043 Pol::reverse (rev_s2 + j, s2, n2); 00044 if (j > 0) rev_s2[j-1] = (C) 0; 00045 for (; i <= n2; i += m, d += m) 00046 *d = Pol::Vec::inn_prod (s1, rev_s2 + j + n2 - i, i); 00047 for (i = 1 + ((m+j-1) % m); i < n1; i += m, d += m) 00048 *d = Pol::Vec::inn_prod (s1 + i - j, rev_s2, min (n2, n1 - i) + j); 00049 } 00050 mmx_delete<K> (rev_s2, l); 00051 } 00052 }; 00053 00054 #define POLYNOMIAL_MUL_IMPL(C) \ 00055 template<typename V, typename K> \ 00056 struct polynomial_mul_helper<V,C,K> { \ 00057 static inline void \ 00058 op (C* dest, const C* s1, const C* s2, nat n1, nat n2) { \ 00059 polynomial_mul_simd_helper<V,C,K>::op (dest, s1, s2, n1, n2); } \ 00060 }; 00061 00062 template<typename V, typename C> 00063 struct polynomial_tmul_simd_helper { 00064 typedef implementation<polynomial_linear,V> Pol; 00065 static const nat m = Simd_size (C); 00066 static void 00067 op (C* dest, const C* s1, const C* s2, nat n1, nat n2) { 00068 nat l = aligned_size<C,V> (n1+m); 00069 C* aligned_s1 = mmx_new<C> (l); 00070 for (nat j = 0; j < m; j++) { 00071 C* d = dest + j; 00072 const C* t2 = s2; 00073 Pol::copy (aligned_s1 + j, s1, n1); 00074 if (j > 0) aligned_s1[j-1] = (C) 0; 00075 for (nat i = j; i < n2; i+=m, d+=m, t2+=m) 00076 *d = Pol::Vec::inn_prod (aligned_s1, t2, n1+j); 00077 } 00078 mmx_delete<C> (aligned_s1, l); 00079 } 00080 }; 00081 00082 #define POLYNOMIAL_TMUL_IMPL(C) \ 00083 template<typename V> \ 00084 struct polynomial_tmul_helper<V,C> { \ 00085 static inline void \ 00086 op (C* dest, const C* s1, const C* s2, nat n1, nat n2) { \ 00087 polynomial_tmul_simd_helper<V,C>::op (dest, s1, s2, n1, n2); } \ 00088 }; 00089 00090 POLYNOMIAL_MUL_IMPL (double); 00091 POLYNOMIAL_TMUL_IMPL (double); 00092 00093 #undef POLYNOMIAL_MUL_IMPL 00094 #undef POLYNOMIAL_TMUL_IMPL 00095 00096 /****************************************************************************** 00097 * Division 00098 ******************************************************************************/ 00099 00100 template<typename V, typename C> 00101 struct polynomial_quo_rem_simd_helper { 00102 typedef implementation<polynomial_linear,V> Pol; 00103 static const nat m = Simd_size (C); 00104 static void 00105 op (C* dest, C* s1, const C* s2, nat n1, nat n2) { 00106 // (s1, n1) contains the numerator on input and the remainder on output 00107 // (s2, n2) contains the denominator. We assume n2>0 and s2[n2-1] != 0 00108 // (dest, n1-n2+1) contains the quotient 00109 C* aligned_s2 [m]; 00110 for (nat j = 1; j < m; j++) { 00111 aligned_s2[j] = mmx_new<C> (aligned_size<C,V> (n2+j)); 00112 for (nat k = 0; k < j; k++) 00113 aligned_s2[j][k] = C (0); 00114 Pol::copy (aligned_s2[j] + j, s2, n2); 00115 } 00116 while (n1 >= n2 && n1 != 0) { 00117 int d= n1-n2; 00118 nat j= d % m; 00119 C q= quo (s1[n1-1], s2[n2-1]); 00120 dest[d]= q; 00121 if (j == 0) 00122 Pol::Vec::template vec_binary_scalar<mul_add_op,C,C,C> 00123 (s1 + d, s2, -q, n2); 00124 else 00125 Pol::Vec::template vec_binary_scalar<mul_add_op,C,C,C> 00126 (s1 + d - j, aligned_s2 [j], -q, n2 + j); 00127 n1--; 00128 } 00129 for (nat j = 1; j < m; j++) 00130 mmx_delete<C> (aligned_s2[j], aligned_size<C,V> (n2+j)); 00131 } 00132 }; 00133 00134 #define POLYNOMIAL_QUO_REM_IMPL(C) \ 00135 template<typename V> \ 00136 struct polynomial_quo_rem_helper<V,C> { \ 00137 static inline void \ 00138 op (C* dest, C* s1, const C* s2, nat n1, nat n2) { \ 00139 polynomial_quo_rem_simd_helper<V,C>::op (dest, s1, s2, n1, n2); } \ 00140 }; 00141 00142 POLYNOMIAL_QUO_REM_IMPL (double); 00143 00144 #undef POLYNOMIAL_QUO_REM_IMPL 00145 00146 /****************************************************************************** 00147 * Efficient evaluation of polynomials 00148 ******************************************************************************/ 00149 00150 template<typename V, typename C> 00151 struct polynomial_evaluate_simd_helper { 00152 static const nat m = Simd_size (C); 00153 typedef typename Simd_type (C) sC; 00154 static inline C 00155 op (const C* p, const C& x, nat l) { 00156 if (l == 0) return 0; 00157 nat k = (l+m-1) / m; 00158 const C* q = p + (k-1) * m; 00159 sC sr = simd_load_aligned (q); 00160 nat i = 1; q -= m; 00161 sC sxm= simd_set_duplicate (binpow (x, m)); 00162 for (; i + 4 < k; i += 4, q -= 4*m) { 00163 sr = simd_load_aligned (q) + sxm * sr; 00164 sr = simd_load_aligned (q-m) + sxm * sr; 00165 sr = simd_load_aligned (q-2*m) + sxm * sr; 00166 sr = simd_load_aligned (q-3*m) + sxm * sr; 00167 } 00168 for (; i < k; i++, q -= m) 00169 sr = simd_load_aligned (q) + sxm * sr; 00170 C r = ((C*) &sr) [m-1]; 00171 for (i = m-2; i != ((nat) -1); i--) 00172 r = r * x + ((C*) &sr) [i]; 00173 return r; 00174 } 00175 }; 00176 00177 #define POLYNOMIAL_EVALUATE_IMPL(C) \ 00178 template<typename V> \ 00179 struct polynomial_evaluate_helper<V,C> { \ 00180 static inline C \ 00181 op (const C* p, const C& x, nat l) { \ 00182 return polynomial_evaluate_simd_helper<V,C>::op (p, x, l); } \ 00183 }; 00184 00185 POLYNOMIAL_EVALUATE_IMPL (double); 00186 00187 #undef POLYNOMIAL_EVALUATE_IMPL 00188 00189 #endif // __SSE2__ 00190 00191 } // namespace mmx 00192 00193 #endif // NUMERIX_ENABLE_SIMD 00194 #endif //__MMX__POLYNOMIAL_SIMD__HPP