algebramix_doc 0.3
|
00001 00002 /****************************************************************************** 00003 * MODULE : polynomial_tft.hpp 00004 * DESCRIPTION: FFT based algorithms 00005 * COPYRIGHT : (C) 2008 Gregoire Lecerf 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX__POLYNOMIAL_TFT__HPP 00014 #define __MMX__POLYNOMIAL_TFT__HPP 00015 #include <algebramix/polynomial_dicho.hpp> 00016 #include <algebramix/polynomial_ring_dicho.hpp> 00017 #include <algebramix/fft_blocks.hpp> 00018 #include <algebramix/fft_simd.hpp> 00019 #include <algebramix/fft_truncated.hpp> 00020 00021 namespace mmx { 00022 #define TMPL template<typename C> 00023 00024 /****************************************************************************** 00025 * TFT based variant polynomials 00026 ******************************************************************************/ 00027 00028 struct tft_threshold {}; 00029 template<typename V> struct polynomial_balanced_tft; 00030 00031 template<typename C> 00032 struct threshold_helper<C,tft_threshold> { 00033 typedef fixed_value<nat,128> impl; 00034 }; 00035 00036 template<typename C, typename V> 00037 struct threshold_helper<polynomial<C,V>,tft_threshold> { 00038 typedef fixed_value<nat,32> impl; 00039 }; 00040 00041 template<typename C, typename U, typename V, typename W> 00042 struct threshold_helper<modular<modulus<polynomial<C,U>,V>,W>, 00043 tft_threshold> { 00044 typedef fixed_value<nat,32> impl; 00045 }; 00046 00047 template<typename V, typename T= tft_threshold> 00048 struct polynomial_tft_inc: public V { 00049 typedef typename V::Vec Vec; 00050 typedef typename V::Naive Naive; 00051 typedef typename V::Positive Positive; 00052 typedef polynomial_tft_inc<typename V::No_simd,T> No_simd; 00053 typedef polynomial_tft_inc<typename V::No_thread,T> No_thread; 00054 typedef polynomial_tft_inc<typename V::No_scaled,T> No_scaled; 00055 }; 00056 00057 template<typename F, typename V, typename W, typename Th> 00058 struct implementation<F,V,polynomial_tft_inc<W,Th> >: 00059 public implementation<F,V,W> {}; 00060 00061 DEFINE_VARIANT_1 (typename V, V, polynomial_tft, 00062 polynomial_dicho< 00063 polynomial_balanced_tft< 00064 polynomial_tft_inc< 00065 polynomial_karatsuba<V> > > >) 00066 00067 STMPL 00068 struct polynomial_variant_helper<modular<modulus<nat>, 00069 modular_fixed<nat,0x0c0000001> > > { 00070 typedef polynomial_tft<polynomial_naive> PV; 00071 }; 00072 00073 template<typename C, typename V> 00074 struct polynomial_variant_helper<polynomial<C,polynomial_tft<V> > > { 00075 typedef polynomial_gcd_ring_dicho<polynomial_tft<V> > PV; 00076 }; 00077 00078 /****************************************************************************** 00079 * Multiplication 00080 ******************************************************************************/ 00081 00082 template<typename V, typename W, typename Th> 00083 struct implementation<polynomial_multiply,V,polynomial_tft_inc<W,Th> >: 00084 public implementation<polynomial_linear,V> 00085 { 00086 typedef implementation<vector_linear,V> Vec; 00087 typedef implementation<polynomial_linear,V> Pol; 00088 typedef implementation<polynomial_multiply,W> Fallback; 00089 00090 TMPL static inline void 00091 mul (C* dest, const C* s1, const C* s2, nat n1, nat n2) { 00092 typedef fft_blocks_transformer<C, fft_simd_transformer<C>, 00093 8, 5, 10, 16> FFTer; 00094 typedef fft_truncated_transformer<C,FFTer> Tfter; 00095 //typedef fft_truncated_transformer<C> Tfter; 00096 00097 if (n1 < Threshold(C,Th) || n2 < Threshold(C,Th)) 00098 Fallback::mul (dest, s1, s2, n1, n2); 00099 else { 00100 nat n= n1 + n2 - 1; 00101 nat m= next_power_of_two (n); 00102 nat l= aligned_size<C,V> (m); 00103 C* temp0= mmx_new<C> (l); 00104 C* temp1= mmx_new<C> (l); 00105 C* tempd= mmx_new<C> (l); 00106 Tfter tfter (n); 00107 Vec::copy (temp0, s1, n1); 00108 Vec::clear (temp0+n1, n-n1); 00109 Vec::copy (temp1, s2, n2); 00110 Vec::clear (temp1+n2, n-n2); 00111 tfter.direct_transform (temp0); 00112 tfter.direct_transform (temp1); 00113 Vec::mul (tempd, temp0, temp1, n); 00114 tfter.inverse_transform (tempd); 00115 Vec::copy (dest, tempd, n); 00116 mmx_delete<C> (temp0, l); 00117 mmx_delete<C> (temp1, l); 00118 mmx_delete<C> (tempd, l); 00119 } 00120 } 00121 00122 TMPL static inline void 00123 square (C* dest, const C* s1, nat n1) { 00124 typedef fft_truncated_transformer<C> Tfter; 00125 if (n1 < Threshold(C,Th)) 00126 Fallback::square (dest, s1, n1); 00127 else { 00128 nat n= 2 * n1 - 1; 00129 nat m= next_power_of_two (n); 00130 nat l= aligned_size<C,V> (m); 00131 C* temp0= mmx_new<C> (l); 00132 C* tempd= mmx_new<C> (l); 00133 Tfter tfter (n); 00134 Vec::copy (temp0, s1, n1); 00135 Vec::clear (temp0+n1, n-n1); 00136 tfter.direct_transform (temp0); 00137 Vec::mul (tempd, temp0, temp0, n); 00138 tfter.inverse_transform (tempd); 00139 Vec::copy (dest, tempd, n); 00140 mmx_delete<C> (temp0, l); 00141 mmx_delete<C> (tempd, l); 00142 } 00143 } 00144 00145 }; // implementation<polynomial_multiply,V,polynomial_tft_inc<W,Th> > 00146 00147 /****************************************************************************** 00148 * Degree balancing 00149 ******************************************************************************/ 00150 00151 template<typename V> 00152 struct polynomial_balanced_tft: public V { 00153 typedef typename V::Vec Vec; 00154 typedef typename V::Naive Naive; 00155 typedef polynomial_balanced_tft<typename V::Positive> Positive; 00156 typedef polynomial_balanced_tft<typename V::No_simd> No_simd; 00157 typedef polynomial_balanced_tft<typename V::No_thread> No_thread; 00158 typedef polynomial_balanced_tft<typename V::No_scaled> No_scaled; 00159 }; 00160 00161 template<typename F, typename V, typename W> 00162 struct implementation<F,V,polynomial_balanced_tft<W> >: 00163 public implementation<F,V,W> {}; 00164 00165 template<typename V, typename W> 00166 struct implementation<polynomial_multiply,V,polynomial_balanced_tft<W> >: 00167 public implementation<polynomial_linear,V> 00168 { 00169 typedef implementation<vector_linear,V> Vec; 00170 typedef implementation<polynomial_linear,W> Pol; 00171 typedef implementation<polynomial_multiply,W> Fallback; 00172 00173 TMPL static inline void 00174 mul (C* dest, const C* s1, const C* s2, nat n1, nat n2) { 00175 if (n1 == 0 && n2 == 0) 00176 return; 00177 if (n1 == 0 || n2 == 0) { 00178 Pol::clear (dest, n1+n2-1); 00179 return; 00180 } 00181 if (n2 < n1) { 00182 mul (dest, s2, s1, n2, n1); 00183 return; 00184 } 00185 nat n= n1+n2-1; 00186 nat m= next_power_of_two (2*n1 - 1); 00187 if (n1 <= n2 && n >= m) { 00188 Pol::clear (dest, n); 00189 nat l= aligned_size<C,V> (m); 00190 C* temp= mmx_new<C> (l); 00191 while (n1+n2-1 >= m) { 00192 Fallback::mul (temp, s1, s2, n1, m-n1+1); 00193 Vec::add (dest, temp, m); 00194 n2 -= m-n1+1; s2 += m-n1+1; dest += m-n1+1; 00195 } 00196 if (n2 > 0) { 00197 mul (temp, s1, s2, n1, n2); 00198 Vec::add (dest, temp, n1+n2-1); 00199 } 00200 mmx_delete<C> (temp, l); 00201 return; 00202 } 00203 Fallback::mul (dest, s1, s2, n1, n2); 00204 } 00205 00206 TMPL static inline void 00207 square (C* dest, const C* s1, nat n1) { 00208 Fallback::square (dest, s1, n1); 00209 } 00210 00211 }; // implementation<polynomial_multiply,V,polynomial_balanced_tft<W> > 00212 00213 #undef TMPL 00214 } // namespace mmx 00215 #endif //__MMX__POLYNOMIAL_TFT__HPP