html/doxygen/polynomial__unrolled_8hpp_source.html

00001
00002 /******************************************************************************
00003 * MODULE     : polynomial_unrolled.hpp
00004 * DESCRIPTION: Loop unrolling for naive polynomial arithmetic
00005 * COPYRIGHT  : (C) 2008  Gregoire Lecerf
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012
00013 #ifndef __MMX__POLYNOMIAL_UNROLLED__HPP
00014 #define __MMX__POLYNOMIAL_UNROLLED__HPP
00015 #include <algebramix/polynomial_naive.hpp>
00016 #include <algebramix/polynomial.hpp>
00017 #include <algebramix/vector_unrolled.hpp>
00018 #include <numerix/simd.hpp>
00019
00020 #define TMPL template<typename C>
00021 #define TMPLK template<typename C, typename K>
00022
00023 namespace mmx {
00024
00025 /******************************************************************************
00026 * Unrolled algorithms for polynomials
00027 ******************************************************************************/
00028
00029 template<typename V, nat sz>
00030 struct polynomial_unrolled: public V {
00031   typedef vector_unrolled<sz> Vec;
00032   typedef typename V::Naive Naive;
00033   typedef polynomial_unrolled<typename V::Positive,sz> Positive;
00034   typedef polynomial_unrolled<typename V::No_simd,sz> No_simd;
00035   typedef polynomial_unrolled<typename V::No_thread,sz> No_thread;
00036   typedef polynomial_unrolled<typename V::No_scaled,sz> No_scaled;
00037 };
00038
00039 template<typename F, typename V, typename W, nat sz>
00040 struct implementation<F,V,polynomial_unrolled<W,sz> >:
00041   public implementation<F,V,W> {};
00042
00043 /******************************************************************************
00044 * Multiplication
00045 ******************************************************************************/
00046
00047 template<typename V, typename C, typename K>
00048 struct polynomial_mul_helper {
00049   typedef implementation<vector_linear,V> Vec;
00050   typedef implementation<polynomial_linear,V> Pol;
00051
00052   static void
00053   op (C* dest, const C* s1, const K* s2, nat n1, nat n2) {
00054     nat l = aligned_size<K,V> (n2);
00055     K* rev_s2 = mmx_new<K> (l);
00056     Pol::reverse (rev_s2, s2, n2);
00057     for (nat i = 1; i <= n2; i++, dest++)
00058       *dest = Vec::inn_prod (s1    , rev_s2 + n2 - i, min (i, n1));
00059     for (nat i = 1; i <  n1; i++, dest++)
00060       *dest = Vec::inn_prod (s1 + i, rev_s2         , min (n2, n1 - i));
00061     mmx_delete (rev_s2, l);
00062   }
00063 };
00064
00065 template<typename V, typename C>
00066 struct polynomial_tmul_helper {
00067   typedef implementation<vector_linear,V> Vec;
00068
00069   static void
00070   op (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00071     for (nat i = 0; i < n2; i++, dest++, s2++)
00072       *dest = Vec::inn_prod (s1, s2, n1);
00073   }
00074 };
00075
00076 template<typename V, typename W, nat m>
00077 struct implementation<polynomial_multiply,V,polynomial_unrolled<W,m> >:
00078   public implementation<polynomial_linear,V>
00079 {
00080   typedef implementation<vector_linear,V> Vec;
00081   typedef implementation<polynomial_multiply,W> Fallback;
00082
00083 TMPLK static void
00084 mul (C* dest, const C* s1, const K* s2, nat n1, nat n2) {
00085   Vec::clear (dest, n1+n2-1);
00086   polynomial_mul_helper<W,C,K>::op (dest, s1, s2, n1, n2);
00087 }
00088
00089 TMPL static inline void
00090 square (C* dest, const C* s, nat n) {
00091   mul (dest, s, s, n, n);
00092 }
00093
00094 TMPL static void
00095 tmul (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00096   Vec::clear (dest, n2);
00097   polynomial_tmul_helper<W,C>::op (dest, s1, s2, n1, n2);
00098 }
00099
00100 }; // implementation<polynomial_multiply,V,polynomial_unrolled<W,m> >
00101
00102 /******************************************************************************
00103 * Graeffe transformations
00104 ******************************************************************************/
00105
00106 template<typename V, typename W, nat m>
00107 struct implementation<polynomial_graeffe,V,polynomial_unrolled<W,m> >:
00108   public implementation<polynomial_multiply,V>
00109 {
00110   typedef implementation<vector_linear,V> Vec;
00111   typedef implementation<polynomial_multiply,V> Pol;
00112
00113 TMPL static void
00114 graeffe (C* dest, const C* s, nat n) {
00115   nat l1= (n+1) >> 1, l2= n >> 1;
00116   // More allocation is done to ensure memory alignment
00117   nat l= aligned_size<C,V> (n + 1);
00118   C* c1= mmx_new<C> (l);
00119   C* c2= mmx_new<C> (l);
00120   if (n>0) dest[n-1]= C(0);
00121   Vec::half_copy (c1, s  , l1);
00122   Pol::square (dest, c1, l1);
00123   Vec::half_copy (c1, s+1, l2);
00124   Pol::square (c2 , c1, l2);
00125   Pol::copy (c1 + 1, c2, l2 << 1);
00126   c1[0] = C(0);
00127   Pol::sub (dest, c1, l2 << 1);
00128   mmx_delete<C> (c1, l);
00129   mmx_delete<C> (c2, l);
00130 }
00131
00132 }; // implementation<polynomial_graeffe,V,polynomial_unrolled<W,m> >
00133
00134 /******************************************************************************
00135 * Division
00136 ******************************************************************************/
00137
00138 template<typename V, typename C>
00139 struct polynomial_quo_rem_helper {
00140   typedef implementation<vector_linear,V> Vec;
00141
00142   static void
00143   op (C* dest, C* s1, const C* s2, nat n1, nat n2) {
00144     // (s1, n1) contains the numerator on input and the remainder on output
00145     // (s2, n2) contains the denominator. We assume n2>0 and s2[n2-1] != 0
00146     // (dest, n1-n2+1) contains the quotient
00147     while (n1 >= n2 && n1 != 0) {
00148       int d= n1-n2;
00149       C q= quo (s1[n1-1], s2[n2-1]);
00150       dest[d]= q;
00151       Vec::template vec_binary_scalar<mul_add_op,C,C,C> (s1 + d, s2, -q, n2);
00152       n1--;
00153     }
00154   }
00155 };
00156
00157 template<typename V, typename W, nat m>
00158 struct implementation<polynomial_divide,V,polynomial_unrolled<W,m> >:
00159   public implementation<polynomial_multiply,V>
00160 {
00161
00162 TMPL static void
00163 quo_rem (C* dest, C* s1, const C* s2, nat n1, nat n2) {
00164   polynomial_quo_rem_helper<W,C>::op (dest, s1, s2, n1, n2);
00165 }
00166
00167 }; // implementation<polynomial_divide,V,polynomial_unrolled<W,m> >
00168
00169 /******************************************************************************
00170 * Efficient evaluation of polynomials
00171 ******************************************************************************/
00172
00173 template<typename V, typename C>
00174 struct polynomial_evaluate_helper {
00175   typedef implementation<polynomial_evaluate,polynomial_naive> Pol;
00176
00177   static inline C
00178   op (const C* p, const C& x, nat l) {
00179     return Pol::evaluate (p, x, l);
00180   }
00181 };
00182 #ifdef ALGEBRAMIX_ENABLE_UNSTABLE
00183 template<typename V, typename W, nat m>
00184 struct implementation<polynomial_evaluate,V,polynomial_unrolled<W,m> >:
00185   public implementation<polynomial_divide,V>
00186 {
00187
00188 TMPL static void
00189 annulator (C* d, const C* s, nat n) {
00190   (void) d; (void) s; (void) n;
00191   ERROR ("expand not yet implemented");
00192 }
00193
00194 TMPL static C
00195 evaluate (const C* p, const C& x, nat l) {
00196   return polynomial_evaluate_helper<W,C>::op (p, x, l);
00197 }
00198
00199 TMPL static void
00200 evaluate (C* v, const C* p, const C* x, nat l, nat n) {
00201   // Evaluate the polynomial (p, l) at points x[0], ..., x[n-1]
00202   for (nat j=0; j<n; j++)
00203     v[j]= evaluate (p, x[j], l);
00204 }
00205
00206 TMPL static void
00207 expand (C** v, const C* p, const C* x, const nat* nu, nat n, nat k) {
00208   (void) v; (void) p; (void) x; (void) nu; (void) n; (void) k;
00209   ERROR ("expand not yet implemented");
00210 }
00211
00212 }; // implementation<polynomial_evaluate,V,polynomial_unrolled<W,m> >
00213 #endif
00214 } // namespace mmx
00215
00216 #undef TMPL
00217 #undef TMPLK
00218 #endif //__MMX__POLYNOMIAL_UNROLLED__HPP