algebramix_doc 0.3
|
00001 00002 /****************************************************************************** 00003 * MODULE : polynomial_unrolled.hpp 00004 * DESCRIPTION: Loop unrolling for naive polynomial arithmetic 00005 * COPYRIGHT : (C) 2008 Gregoire Lecerf 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX__POLYNOMIAL_UNROLLED__HPP 00014 #define __MMX__POLYNOMIAL_UNROLLED__HPP 00015 #include <algebramix/polynomial_naive.hpp> 00016 #include <algebramix/polynomial.hpp> 00017 #include <algebramix/vector_unrolled.hpp> 00018 #include <numerix/simd.hpp> 00019 00020 #define TMPL template<typename C> 00021 #define TMPLK template<typename C, typename K> 00022 00023 namespace mmx { 00024 00025 /****************************************************************************** 00026 * Unrolled algorithms for polynomials 00027 ******************************************************************************/ 00028 00029 template<typename V, nat sz> 00030 struct polynomial_unrolled: public V { 00031 typedef vector_unrolled<sz> Vec; 00032 typedef typename V::Naive Naive; 00033 typedef polynomial_unrolled<typename V::Positive,sz> Positive; 00034 typedef polynomial_unrolled<typename V::No_simd,sz> No_simd; 00035 typedef polynomial_unrolled<typename V::No_thread,sz> No_thread; 00036 typedef polynomial_unrolled<typename V::No_scaled,sz> No_scaled; 00037 }; 00038 00039 template<typename F, typename V, typename W, nat sz> 00040 struct implementation<F,V,polynomial_unrolled<W,sz> >: 00041 public implementation<F,V,W> {}; 00042 00043 /****************************************************************************** 00044 * Multiplication 00045 ******************************************************************************/ 00046 00047 template<typename V, typename C, typename K> 00048 struct polynomial_mul_helper { 00049 typedef implementation<vector_linear,V> Vec; 00050 typedef implementation<polynomial_linear,V> Pol; 00051 00052 static void 00053 op (C* dest, const C* s1, const K* s2, nat n1, nat n2) { 00054 nat l = aligned_size<K,V> (n2); 00055 K* rev_s2 = mmx_new<K> (l); 00056 Pol::reverse (rev_s2, s2, n2); 00057 for (nat i = 1; i <= n2; i++, dest++) 00058 *dest = Vec::inn_prod (s1 , rev_s2 + n2 - i, min (i, n1)); 00059 for (nat i = 1; i < n1; i++, dest++) 00060 *dest = Vec::inn_prod (s1 + i, rev_s2 , min (n2, n1 - i)); 00061 mmx_delete (rev_s2, l); 00062 } 00063 }; 00064 00065 template<typename V, typename C> 00066 struct polynomial_tmul_helper { 00067 typedef implementation<vector_linear,V> Vec; 00068 00069 static void 00070 op (C* dest, const C* s1, const C* s2, nat n1, nat n2) { 00071 for (nat i = 0; i < n2; i++, dest++, s2++) 00072 *dest = Vec::inn_prod (s1, s2, n1); 00073 } 00074 }; 00075 00076 template<typename V, typename W, nat m> 00077 struct implementation<polynomial_multiply,V,polynomial_unrolled<W,m> >: 00078 public implementation<polynomial_linear,V> 00079 { 00080 typedef implementation<vector_linear,V> Vec; 00081 typedef implementation<polynomial_multiply,W> Fallback; 00082 00083 TMPLK static void 00084 mul (C* dest, const C* s1, const K* s2, nat n1, nat n2) { 00085 Vec::clear (dest, n1+n2-1); 00086 polynomial_mul_helper<W,C,K>::op (dest, s1, s2, n1, n2); 00087 } 00088 00089 TMPL static inline void 00090 square (C* dest, const C* s, nat n) { 00091 mul (dest, s, s, n, n); 00092 } 00093 00094 TMPL static void 00095 tmul (C* dest, const C* s1, const C* s2, nat n1, nat n2) { 00096 Vec::clear (dest, n2); 00097 polynomial_tmul_helper<W,C>::op (dest, s1, s2, n1, n2); 00098 } 00099 00100 }; // implementation<polynomial_multiply,V,polynomial_unrolled<W,m> > 00101 00102 /****************************************************************************** 00103 * Graeffe transformations 00104 ******************************************************************************/ 00105 00106 template<typename V, typename W, nat m> 00107 struct implementation<polynomial_graeffe,V,polynomial_unrolled<W,m> >: 00108 public implementation<polynomial_multiply,V> 00109 { 00110 typedef implementation<vector_linear,V> Vec; 00111 typedef implementation<polynomial_multiply,V> Pol; 00112 00113 TMPL static void 00114 graeffe (C* dest, const C* s, nat n) { 00115 nat l1= (n+1) >> 1, l2= n >> 1; 00116 // More allocation is done to ensure memory alignment 00117 nat l= aligned_size<C,V> (n + 1); 00118 C* c1= mmx_new<C> (l); 00119 C* c2= mmx_new<C> (l); 00120 if (n>0) dest[n-1]= C(0); 00121 Vec::half_copy (c1, s , l1); 00122 Pol::square (dest, c1, l1); 00123 Vec::half_copy (c1, s+1, l2); 00124 Pol::square (c2 , c1, l2); 00125 Pol::copy (c1 + 1, c2, l2 << 1); 00126 c1[0] = C(0); 00127 Pol::sub (dest, c1, l2 << 1); 00128 mmx_delete<C> (c1, l); 00129 mmx_delete<C> (c2, l); 00130 } 00131 00132 }; // implementation<polynomial_graeffe,V,polynomial_unrolled<W,m> > 00133 00134 /****************************************************************************** 00135 * Division 00136 ******************************************************************************/ 00137 00138 template<typename V, typename C> 00139 struct polynomial_quo_rem_helper { 00140 typedef implementation<vector_linear,V> Vec; 00141 00142 static void 00143 op (C* dest, C* s1, const C* s2, nat n1, nat n2) { 00144 // (s1, n1) contains the numerator on input and the remainder on output 00145 // (s2, n2) contains the denominator. We assume n2>0 and s2[n2-1] != 0 00146 // (dest, n1-n2+1) contains the quotient 00147 while (n1 >= n2 && n1 != 0) { 00148 int d= n1-n2; 00149 C q= quo (s1[n1-1], s2[n2-1]); 00150 dest[d]= q; 00151 Vec::template vec_binary_scalar<mul_add_op,C,C,C> (s1 + d, s2, -q, n2); 00152 n1--; 00153 } 00154 } 00155 }; 00156 00157 template<typename V, typename W, nat m> 00158 struct implementation<polynomial_divide,V,polynomial_unrolled<W,m> >: 00159 public implementation<polynomial_multiply,V> 00160 { 00161 00162 TMPL static void 00163 quo_rem (C* dest, C* s1, const C* s2, nat n1, nat n2) { 00164 polynomial_quo_rem_helper<W,C>::op (dest, s1, s2, n1, n2); 00165 } 00166 00167 }; // implementation<polynomial_divide,V,polynomial_unrolled<W,m> > 00168 00169 /****************************************************************************** 00170 * Efficient evaluation of polynomials 00171 ******************************************************************************/ 00172 00173 template<typename V, typename C> 00174 struct polynomial_evaluate_helper { 00175 typedef implementation<polynomial_evaluate,polynomial_naive> Pol; 00176 00177 static inline C 00178 op (const C* p, const C& x, nat l) { 00179 return Pol::evaluate (p, x, l); 00180 } 00181 }; 00182 #ifdef ALGEBRAMIX_ENABLE_UNSTABLE 00183 template<typename V, typename W, nat m> 00184 struct implementation<polynomial_evaluate,V,polynomial_unrolled<W,m> >: 00185 public implementation<polynomial_divide,V> 00186 { 00187 00188 TMPL static void 00189 annulator (C* d, const C* s, nat n) { 00190 (void) d; (void) s; (void) n; 00191 ERROR ("expand not yet implemented"); 00192 } 00193 00194 TMPL static C 00195 evaluate (const C* p, const C& x, nat l) { 00196 return polynomial_evaluate_helper<W,C>::op (p, x, l); 00197 } 00198 00199 TMPL static void 00200 evaluate (C* v, const C* p, const C* x, nat l, nat n) { 00201 // Evaluate the polynomial (p, l) at points x[0], ..., x[n-1] 00202 for (nat j=0; j<n; j++) 00203 v[j]= evaluate (p, x[j], l); 00204 } 00205 00206 TMPL static void 00207 expand (C** v, const C* p, const C* x, const nat* nu, nat n, nat k) { 00208 (void) v; (void) p; (void) x; (void) nu; (void) n; (void) k; 00209 ERROR ("expand not yet implemented"); 00210 } 00211 00212 }; // implementation<polynomial_evaluate,V,polynomial_unrolled<W,m> > 00213 #endif 00214 } // namespace mmx 00215 00216 #undef TMPL 00217 #undef TMPLK 00218 #endif //__MMX__POLYNOMIAL_UNROLLED__HPP