html/doxygen/fkt__transform_8hpp_source.html

00001
00002 /******************************************************************************
00003 * MODULE     : fkt_transform.hpp
00004 * DESCRIPTION: fast Karatsuba transform
00005 * COPYRIGHT  : (C) 2004  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012
00013 #ifndef __MMX__FKT_TRANSFORM__HPP
00014 #define __MMX__FKT_TRANSFORM__HPP
00015 #include <algebramix/polynomial_naive.hpp>
00016
00017 namespace mmx {
00018 #define TMPL template<class C>
00019
00020 template<typename V>
00021 struct fkt_package {
00022   typedef implementation<polynomial_multiply,V> Pol;
00023   typedef typename Pol::Vec Vec;
00024
00025 TMPL static void
00026 direct_fkt_step (C* dest, nat n) {
00027   // dest of length n on entry and 3n/2 on exit
00028   nat h= n >> 1;
00029   Vec::half_copy (dest + n, dest + 1, h);
00030   Vec::half_copy (dest + 1, dest + 2, h - 1);
00031   Pol::add (dest + h, dest, dest + n, h);
00032 }
00033
00034 TMPL static void
00035 direct_fkt (C* dest, nat n2, nat n3) {
00036   // dest of length n2 = 2^p on entry and n3 = 3^p on exit
00037   if (n2 == 2) {
00038     dest[2]= dest[1];
00039     dest[1] += dest[0];
00040   }
00041   else if (n2 == 4) {
00042     C p= dest[0] + dest[1];
00043     C q= dest[2] + dest[3];
00044     dest[6]= dest[2];
00045     dest[2]= dest[1];
00046     dest[8]= dest[3];
00047     dest[1]= p;
00048     dest[3]= dest[0] + dest[6];
00049     dest[4]= p+q;
00050     dest[5]= dest[2] + dest[8];
00051     dest[7]= q;
00052   }
00053   else while (n2 != n3) {
00054     direct_fkt_step (dest, n2);
00055     n2= n2 + (n2 >> 1);
00056   }
00057 }
00058
00059 TMPL static void
00060 inverse_fkt_step (C* dest, nat n, nat l) {
00061   // dest of length n = 3 k l on entry for some integer k
00062   // dest of length (2 l + 1) k on exit
00063   nat t= n/3, k= t/l, ll= (l << 1) + 1;
00064   Pol::sub (dest + t, dest, t);
00065   Pol::sub (dest + t, dest + (t << 1), t);
00066   nat buf_size= aligned_size<C,V> (n);
00067   C* buf= mmx_new<C> (buf_size);
00068   Pol::copy (buf, dest, n);
00069   C* src= buf;
00070   if (l == 1) {
00071     Vec::triple_copy (dest, buf, t);
00072     Vec::triple_copy (dest + 1, buf + t, t);
00073     Vec::triple_copy (dest + 2, buf + (t << 1), t);
00074   }
00075   else while (k != 0) {
00076     Vec::double_copy (dest, src, l);
00077     Vec::double_copy (dest + 1, src + t, l);
00078     Vec::double_add (dest + 2, src + (t << 1), l-1);
00079     dest[l<<1]= src[(t<<1) + (l-1)];
00080     dest += ll; src += l; k--;
00081   }
00082   mmx_delete<C> (buf, buf_size);
00083 }
00084
00085 TMPL static void
00086 inverse_fkt (C* dest, nat n3, nat n2) {
00087   // dest of length n3 = 3^p on entry and 2 n2 - 1 on exit with n2 = 2 ^ p
00088   if (n2 == 2)
00089     dest[1] -= (dest[0] + dest[2]);
00090   else if (n2 == 4) {
00091     dest[3] -= (dest[0] + dest[6]);
00092     dest[4] -= (dest[1] + dest[7]);
00093     dest[5] -= (dest[2] + dest[8]);
00094     C aux= dest[1]; dest[1]= dest[3]; dest[3]= aux;
00095     aux= dest[2]; dest[2]= dest[6]; dest[6]= aux;
00096     aux= dest[5]; dest[5]= dest[7]; dest[7]= aux;
00097     dest[3] -= (dest[0] + dest[6]);
00098     dest[4] -= (dest[1] + dest[7]);
00099     dest[5] -= (dest[2] + dest[8]);
00100     aux= dest[1];
00101     dest[1]= dest[3];
00102     dest[3]= dest[4];
00103     dest[4]= dest[2] + dest[7];
00104     dest[2]= aux + dest[6];
00105     dest[6]= dest[8];
00106   }
00107   else {
00108     nat n= n3, l= 1;
00109     while (n != l) {
00110       inverse_fkt_step (dest, n, l);
00111       nat k= n / ((l << 1) + l);
00112       l= (l << 1) + 1;
00113       n= k * l;
00114     }
00115   }
00116 }
00117
00118 };
00119
00120 #undef TMPL
00121 } // namespace mmx
00122 #endif //__MMX__FKT_TRANSFORM__HPP