algebramix_doc 0.3
/Users/mourrain/Devel/mmx/algebramix/include/algebramix/polynomial_simd.hpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : polynomial_simd.hpp
00004 * DESCRIPTION: Simd support for polynomials
00005 * COPYRIGHT  : (C) 2008  Gregoire Lecerf
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012 
00013 #ifndef __MMX__POLYNOMIAL_SIMD__HPP
00014 #define __MMX__POLYNOMIAL_SIMD__HPP
00015 #include <numerix/simd.hpp>
00016 #ifdef NUMERIX_ENABLE_SIMD
00017 #include <algebramix/polynomial_unrolled.hpp>
00018 
00019 namespace mmx {
00020 
00021 /******************************************************************************
00022 * Multiplication
00023 ******************************************************************************/
00024 
00025 #ifdef __SSE2__
00026 
00027 template<typename V, typename C, typename K>
00028 struct polynomial_mul_simd_helper {
00029   typedef implementation<polynomial_linear,V> Pol;
00030   static const nat m = Simd_size (C);
00031 
00032   static void
00033   op (C* dest, const C* s1, const K* s2, nat n1, nat n2) {
00034     if (n1 < n2) {
00035       op (dest, s2, s1, n2, n1);
00036       return;
00037     }
00038     nat l = aligned_size<K,V> (n2+m);
00039     K* rev_s2 = mmx_new<K> (l);
00040     for (nat j = 0; j < m; j++) {
00041       nat i = 1 + ((n2 + m - j - 1) % m);
00042       K* d = dest + i - 1;
00043       Pol::reverse (rev_s2 + j, s2, n2);
00044       if (j > 0) rev_s2[j-1] = (C) 0;
00045       for (; i <= n2; i += m, d += m)
00046         *d = Pol::Vec::inn_prod (s1, rev_s2 + j + n2 - i, i);
00047       for (i = 1 + ((m+j-1) % m); i < n1; i += m, d += m)
00048         *d = Pol::Vec::inn_prod (s1 + i - j, rev_s2, min (n2, n1 - i) + j);
00049     }
00050     mmx_delete<K> (rev_s2, l);
00051   }
00052 };
00053 
00054 #define POLYNOMIAL_MUL_IMPL(C)                                          \
00055   template<typename V, typename K>                                      \
00056   struct polynomial_mul_helper<V,C,K> {                                 \
00057     static inline void                                                  \
00058     op (C* dest, const C* s1, const C* s2, nat n1, nat n2) {            \
00059       polynomial_mul_simd_helper<V,C,K>::op (dest, s1, s2, n1, n2); }   \
00060   };
00061 
00062 template<typename V, typename C>
00063 struct polynomial_tmul_simd_helper {
00064   typedef implementation<polynomial_linear,V> Pol;
00065   static const nat m = Simd_size (C);
00066   static void
00067   op (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00068     nat l = aligned_size<C,V> (n1+m);
00069     C* aligned_s1 = mmx_new<C> (l);
00070     for (nat j = 0; j < m; j++) {
00071       C* d = dest + j;
00072       const C* t2 = s2;
00073       Pol::copy (aligned_s1 + j, s1, n1);
00074       if (j > 0) aligned_s1[j-1] = (C) 0;
00075       for (nat i = j; i < n2; i+=m, d+=m, t2+=m)
00076         *d = Pol::Vec::inn_prod (aligned_s1, t2, n1+j); 
00077     }
00078     mmx_delete<C> (aligned_s1, l);
00079   } 
00080 };
00081 
00082 #define POLYNOMIAL_TMUL_IMPL(C)                                         \
00083   template<typename V>                                                  \
00084   struct polynomial_tmul_helper<V,C> {                                  \
00085     static inline void                                                  \
00086     op (C* dest, const C* s1, const C* s2, nat n1, nat n2) {            \
00087       polynomial_tmul_simd_helper<V,C>::op (dest, s1, s2, n1, n2); }    \
00088   };
00089 
00090 POLYNOMIAL_MUL_IMPL (double);
00091 POLYNOMIAL_TMUL_IMPL (double);
00092 
00093 #undef POLYNOMIAL_MUL_IMPL
00094 #undef POLYNOMIAL_TMUL_IMPL
00095 
00096 /******************************************************************************
00097 * Division
00098 ******************************************************************************/
00099 
00100 template<typename V, typename C>
00101 struct polynomial_quo_rem_simd_helper {
00102   typedef implementation<polynomial_linear,V> Pol;
00103   static const nat m = Simd_size (C);
00104   static void
00105   op (C* dest, C* s1, const C* s2, nat n1, nat n2) {
00106     // (s1, n1) contains the numerator on input and the remainder on output
00107     // (s2, n2) contains the denominator. We assume n2>0 and s2[n2-1] != 0
00108     // (dest, n1-n2+1) contains the quotient
00109     C* aligned_s2 [m];
00110     for (nat j = 1; j < m; j++) {
00111       aligned_s2[j] = mmx_new<C> (aligned_size<C,V> (n2+j));
00112       for (nat k = 0; k < j; k++)
00113         aligned_s2[j][k] = C (0);
00114       Pol::copy (aligned_s2[j] + j, s2, n2);
00115     }
00116     while (n1 >= n2 && n1 != 0) {
00117       int d= n1-n2;
00118       nat j= d % m; 
00119       C q= quo (s1[n1-1], s2[n2-1]);
00120       dest[d]= q;
00121       if (j == 0)
00122         Pol::Vec::template vec_binary_scalar<mul_add_op,C,C,C>
00123           (s1 + d, s2, -q, n2);
00124       else
00125         Pol::Vec::template vec_binary_scalar<mul_add_op,C,C,C>
00126           (s1 + d - j, aligned_s2 [j], -q, n2 + j);
00127       n1--;
00128     }
00129     for (nat j = 1; j < m; j++)
00130       mmx_delete<C> (aligned_s2[j], aligned_size<C,V> (n2+j));
00131   }
00132 };
00133 
00134 #define POLYNOMIAL_QUO_REM_IMPL(C)                                      \
00135   template<typename V>                                                  \
00136   struct polynomial_quo_rem_helper<V,C> {                               \
00137     static inline void                                                  \
00138     op (C* dest, C* s1, const C* s2, nat n1, nat n2) {                  \
00139       polynomial_quo_rem_simd_helper<V,C>::op (dest, s1, s2, n1, n2); } \
00140   };
00141 
00142 POLYNOMIAL_QUO_REM_IMPL (double);
00143 
00144 #undef POLYNOMIAL_QUO_REM_IMPL
00145 
00146 /******************************************************************************
00147 * Efficient evaluation of polynomials
00148 ******************************************************************************/
00149 
00150 template<typename V, typename C>
00151 struct polynomial_evaluate_simd_helper {
00152   static const nat m = Simd_size (C);
00153   typedef typename Simd_type (C) sC;
00154   static inline C
00155   op (const C* p, const C& x, nat l) {
00156     if (l == 0) return 0;
00157     nat k = (l+m-1) / m;
00158     const C* q = p + (k-1) * m;
00159     sC sr = simd_load_aligned (q);
00160     nat i = 1; q -= m;
00161     sC sxm= simd_set_duplicate (binpow (x, m));
00162     for (; i + 4 < k; i += 4, q -= 4*m) {
00163       sr = simd_load_aligned (q)     + sxm * sr;
00164       sr = simd_load_aligned (q-m)   + sxm * sr;
00165       sr = simd_load_aligned (q-2*m) + sxm * sr;
00166       sr = simd_load_aligned (q-3*m) + sxm * sr;
00167     }
00168     for (; i < k; i++, q -= m)
00169       sr = simd_load_aligned (q)     + sxm * sr;
00170     C r = ((C*) &sr) [m-1];
00171     for (i = m-2; i != ((nat) -1); i--)
00172       r = r * x + ((C*) &sr) [i];
00173     return r;
00174   }
00175 };
00176   
00177 #define POLYNOMIAL_EVALUATE_IMPL(C)                                     \
00178   template<typename V>                                                  \
00179   struct polynomial_evaluate_helper<V,C> {                              \
00180     static inline C                                                     \
00181     op (const C* p, const C& x, nat l) {                                \
00182       return polynomial_evaluate_simd_helper<V,C>::op (p, x, l); }      \
00183   };
00184 
00185 POLYNOMIAL_EVALUATE_IMPL (double);
00186 
00187 #undef POLYNOMIAL_EVALUATE_IMPL
00188 
00189 #endif // __SSE2__
00190 
00191 } // namespace mmx
00192 
00193 #endif // NUMERIX_ENABLE_SIMD
00194 #endif //__MMX__POLYNOMIAL_SIMD__HPP
 All Classes Namespaces Files Functions Variables Typedefs Friends Defines