html/doxygen/polynomial__tft_8hpp_source.html

00001
00002 /******************************************************************************
00003 * MODULE     : polynomial_tft.hpp
00004 * DESCRIPTION: FFT based algorithms
00005 * COPYRIGHT  : (C) 2008  Gregoire Lecerf
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012
00013 #ifndef __MMX__POLYNOMIAL_TFT__HPP
00014 #define __MMX__POLYNOMIAL_TFT__HPP
00015 #include <algebramix/polynomial_dicho.hpp>
00016 #include <algebramix/polynomial_ring_dicho.hpp>
00017 #include <algebramix/fft_blocks.hpp>
00018 #include <algebramix/fft_simd.hpp>
00019 #include <algebramix/fft_truncated.hpp>
00020
00021 namespace mmx {
00022 #define TMPL template<typename C>
00023
00024 /******************************************************************************
00025 * TFT based variant polynomials
00026 ******************************************************************************/
00027
00028 struct tft_threshold {};
00029 template<typename V> struct polynomial_balanced_tft;
00030
00031 template<typename C>
00032 struct threshold_helper<C,tft_threshold> {
00033   typedef fixed_value<nat,128> impl;
00034 };
00035
00036 template<typename C, typename V>
00037 struct threshold_helper<polynomial<C,V>,tft_threshold> {
00038   typedef fixed_value<nat,32> impl;
00039 };
00040
00041 template<typename C, typename U, typename V, typename W>
00042 struct threshold_helper<modular<modulus<polynomial<C,U>,V>,W>,
00043                         tft_threshold> {
00044 typedef fixed_value<nat,32> impl;
00045 };
00046
00047 template<typename V, typename T= tft_threshold>
00048 struct polynomial_tft_inc: public V {
00049   typedef typename V::Vec Vec;
00050   typedef typename V::Naive Naive;
00051   typedef typename V::Positive Positive;
00052   typedef polynomial_tft_inc<typename V::No_simd,T> No_simd;
00053   typedef polynomial_tft_inc<typename V::No_thread,T> No_thread;
00054   typedef polynomial_tft_inc<typename V::No_scaled,T> No_scaled;
00055 };
00056
00057 template<typename F, typename V, typename W, typename Th>
00058 struct implementation<F,V,polynomial_tft_inc<W,Th> >:
00059   public implementation<F,V,W> {};
00060
00061 DEFINE_VARIANT_1 (typename V, V, polynomial_tft,
00062                   polynomial_dicho<
00063                    polynomial_balanced_tft<
00064                      polynomial_tft_inc<
00065                        polynomial_karatsuba<V> > > >)
00066
00067 STMPL
00068 struct polynomial_variant_helper<modular<modulus<nat>,
00069                                          modular_fixed<nat,0x0c0000001> > > {
00070   typedef polynomial_tft<polynomial_naive> PV;
00071 };
00072
00073 template<typename C, typename V>
00074 struct polynomial_variant_helper<polynomial<C,polynomial_tft<V> > > {
00075   typedef polynomial_gcd_ring_dicho<polynomial_tft<V> > PV;
00076 };
00077
00078 /******************************************************************************
00079 * Multiplication
00080 ******************************************************************************/
00081
00082 template<typename V, typename W, typename Th>
00083 struct implementation<polynomial_multiply,V,polynomial_tft_inc<W,Th> >:
00084   public implementation<polynomial_linear,V>
00085 {
00086   typedef implementation<vector_linear,V> Vec;
00087   typedef implementation<polynomial_linear,V> Pol;
00088   typedef implementation<polynomial_multiply,W> Fallback;
00089
00090 TMPL static inline void
00091 mul (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00092   typedef fft_blocks_transformer<C, fft_simd_transformer<C>,
00093     8, 5, 10, 16> FFTer;
00094   typedef fft_truncated_transformer<C,FFTer> Tfter;
00095   //typedef fft_truncated_transformer<C> Tfter;
00096
00097   if (n1 < Threshold(C,Th) || n2 < Threshold(C,Th))
00098     Fallback::mul (dest, s1, s2, n1, n2);
00099   else {
00100     nat n= n1 + n2 - 1;
00101     nat m= next_power_of_two (n);
00102     nat l= aligned_size<C,V> (m);
00103     C* temp0= mmx_new<C> (l);
00104     C* temp1= mmx_new<C> (l);
00105     C* tempd= mmx_new<C> (l);
00106     Tfter tfter (n);
00107     Vec::copy (temp0, s1, n1);
00108     Vec::clear (temp0+n1, n-n1);
00109     Vec::copy (temp1, s2, n2);
00110     Vec::clear (temp1+n2, n-n2);
00111     tfter.direct_transform (temp0);
00112     tfter.direct_transform (temp1);
00113     Vec::mul (tempd, temp0, temp1, n);
00114     tfter.inverse_transform (tempd);
00115     Vec::copy (dest, tempd, n);
00116     mmx_delete<C> (temp0, l);
00117     mmx_delete<C> (temp1, l);
00118     mmx_delete<C> (tempd, l);
00119   }
00120 }
00121
00122 TMPL static inline void
00123 square (C* dest, const C* s1, nat n1) {
00124   typedef fft_truncated_transformer<C> Tfter;
00125   if (n1 < Threshold(C,Th))
00126     Fallback::square (dest, s1, n1);
00127   else {
00128     nat n= 2 * n1 - 1;
00129     nat m= next_power_of_two (n);
00130     nat l= aligned_size<C,V> (m);
00131     C* temp0= mmx_new<C> (l);
00132     C* tempd= mmx_new<C> (l);
00133     Tfter tfter (n);
00134     Vec::copy (temp0, s1, n1);
00135     Vec::clear (temp0+n1, n-n1);
00136     tfter.direct_transform (temp0);
00137     Vec::mul (tempd, temp0, temp0, n);
00138     tfter.inverse_transform (tempd);
00139     Vec::copy (dest, tempd, n);
00140     mmx_delete<C> (temp0, l);
00141     mmx_delete<C> (tempd, l);
00142   }
00143 }
00144
00145 }; // implementation<polynomial_multiply,V,polynomial_tft_inc<W,Th> >
00146
00147 /******************************************************************************
00148 * Degree balancing
00149 ******************************************************************************/
00150
00151 template<typename V>
00152 struct polynomial_balanced_tft: public V {
00153   typedef typename V::Vec Vec;
00154   typedef typename V::Naive Naive;
00155   typedef polynomial_balanced_tft<typename V::Positive> Positive;
00156   typedef polynomial_balanced_tft<typename V::No_simd> No_simd;
00157   typedef polynomial_balanced_tft<typename V::No_thread> No_thread;
00158   typedef polynomial_balanced_tft<typename V::No_scaled> No_scaled;
00159 };
00160
00161 template<typename F, typename V, typename W>
00162 struct implementation<F,V,polynomial_balanced_tft<W> >:
00163   public implementation<F,V,W> {};
00164
00165 template<typename V, typename W>
00166 struct implementation<polynomial_multiply,V,polynomial_balanced_tft<W> >:
00167   public implementation<polynomial_linear,V>
00168 {
00169   typedef implementation<vector_linear,V> Vec;
00170   typedef implementation<polynomial_linear,W> Pol;
00171   typedef implementation<polynomial_multiply,W> Fallback;
00172
00173 TMPL static inline void
00174 mul (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00175   if (n1 == 0 && n2 == 0)
00176     return;
00177   if (n1 == 0 || n2 == 0) {
00178     Pol::clear (dest, n1+n2-1);
00179     return;
00180   }
00181   if (n2 < n1) {
00182     mul (dest, s2, s1, n2, n1);
00183     return;
00184   }
00185   nat n= n1+n2-1;
00186   nat m= next_power_of_two (2*n1 - 1);
00187   if (n1 <= n2 && n >= m) {
00188     Pol::clear (dest, n);
00189     nat l= aligned_size<C,V> (m);
00190     C* temp= mmx_new<C> (l);
00191     while (n1+n2-1 >= m) {
00192       Fallback::mul (temp, s1, s2, n1, m-n1+1);
00193       Vec::add (dest, temp, m);
00194       n2 -= m-n1+1; s2 += m-n1+1; dest += m-n1+1;
00195     }
00196     if (n2 > 0) {
00197       mul (temp, s1, s2, n1, n2);
00198       Vec::add (dest, temp, n1+n2-1);
00199     }
00200     mmx_delete<C> (temp, l);
00201     return;
00202   }
00203   Fallback::mul (dest, s1, s2, n1, n2);
00204 }
00205
00206 TMPL static inline void
00207 square (C* dest, const C* s1, nat n1) {
00208   Fallback::square (dest, s1, n1);
00209 }
00210
00211 }; // implementation<polynomial_multiply,V,polynomial_balanced_tft<W> >
00212
00213 #undef TMPL
00214 } // namespace mmx
00215 #endif //__MMX__POLYNOMIAL_TFT__HPP