algebramix_doc 0.3
|
00001 00002 /****************************************************************************** 00003 * MODULE : fft_blocks.hpp 00004 * DESCRIPTION: Cache friendly FFT 00005 * COPYRIGHT : (C) 2008 Gregoire Lecerf 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX__FFT_BLOCKS__HPP 00014 #define __MMX__FFT_BLOCKS__HPP 00015 #include <basix/vector.hpp> 00016 #include <basix/threads.hpp> 00017 #include <algebramix/fft_naive.hpp> 00018 00019 namespace mmx { 00020 00021 /****************************************************************************** 00022 * Multi-block FFT transformation 00023 ******************************************************************************/ 00024 00025 template<typename C, typename FFTER= fft_naive_transformer<C>, 00026 nat log2_outer_block_size= 9, 00027 nat log2_block_number= 7, // number of outer blocks 00028 // to be handled in sequence 00029 nat log2_inner_block_size= 11, 00030 nat threshold= 12> 00031 class fft_blocks_transformer { 00032 public: 00033 typedef implementation<vector_linear,vector_naive> NVec; 00034 typedef typename FFTER::R R; 00035 typedef typename R::U U; 00036 typedef typename R::S S; 00037 00038 FFTER* ffter; 00039 nat depth; 00040 nat len; 00041 U* roots; 00042 00043 public: 00044 inline fft_blocks_transformer (nat n): 00045 ffter (new FFTER (n)), 00046 depth (ffter->depth), len (ffter->len), roots (ffter->roots) {} 00047 00048 inline ~fft_blocks_transformer () { delete ffter; } 00049 00050 static inline void 00051 delocate (C* c1, nat s1, C* c0, nat s0, nat n) { 00052 for (nat i= 0; i < s1; i++, c0 += s0) { 00053 C* cc0= c0, * cc1= c1 + i; 00054 for (nat k= 0; k < n; k++, cc0++, cc1+= s1) 00055 *cc1= *cc0; } } 00056 00057 static inline void 00058 relocate (C* c1, nat s1, C* c0, nat s0, nat n) { 00059 for (nat i= 0; i < s1; i++, c0 += s0) { 00060 C* cc0= c0, * cc1= c1 + i; 00061 for (nat k= 0; k < n; k++, cc0++, cc1+= s1) 00062 *cc0= *cc1; } } 00063 00064 inline void 00065 dfft (C* c, nat stride, nat shift, nat steps, nat step1, nat step2) { 00066 if (steps <= max (threshold, log2_inner_block_size)) 00067 ffter->dfft (c, stride, shift, steps, step1, step2); 00068 else { 00069 nat temp_len= default_aligned_size<C> 00070 ((nat) 1<<(log2_outer_block_size+log2_block_number)); 00071 C* c0, * temp= mmx_new<C> (temp_len); 00072 nat step, block, blocks, todo, k_beg, k_end; 00073 00074 for (step= step1; 00075 step < min (steps - log2_inner_block_size, step2); 00076 step += block) { 00077 block = min (min (steps - log2_inner_block_size - step, step2 - step), 00078 log2_outer_block_size); 00079 todo = steps - 1 - step; 00080 blocks= todo - block + 1; 00081 c0= c; 00082 for (nat j= 0; j < ((nat) 1<<step); j++) { 00083 for (k_beg= 0; k_beg < ((nat) 1<<blocks); k_beg= k_end) { 00084 k_end= min (((nat) 1<<blocks), 00085 k_beg + ((nat) 1<< log2_block_number)); 00086 delocate (temp, (nat) 1<<block, 00087 c0 + k_beg, stride<<blocks, k_end - k_beg); 00088 for (nat k= 0; k < k_end - k_beg; k++) 00089 ffter->dfft (temp+(k<<block), 1, 00090 ((shift>>todo) + j)<<(block-1), block); 00091 relocate (temp, (nat) 1<<block, 00092 c0+k_beg, stride<<blocks, k_end - k_beg); 00093 } 00094 c0 += (stride<<(todo+1)); 00095 } 00096 } 00097 mmx_delete<C> (temp, temp_len); 00098 if (step < step2) { 00099 c0= c; 00100 todo= steps - 1 - step; 00101 for (nat j= 0; j < ((nat) 1<<step); j++) { 00102 ffter->dfft (c0, stride, ((shift>>todo) + j) << todo, 00103 todo+1, 0, step2-step); 00104 c0 += (stride<<(todo+1)); 00105 } 00106 } 00107 } 00108 } 00109 00110 inline void 00111 ifft (C* c, nat stride, nat shift, nat steps, nat step1, nat step2) { 00112 if (steps <= max (threshold, log2_inner_block_size)) 00113 ffter->ifft (c, stride, shift, steps, step1, step2); 00114 else { 00115 nat temp_len= default_aligned_size<C> 00116 ((nat) 1<<(log2_outer_block_size+log2_block_number)); 00117 C* c0, * temp= mmx_new<C> (temp_len); 00118 nat step, block, blocks, todo, k_beg, k_end; 00119 00120 if (log2_inner_block_size > steps - step2) { 00121 step= steps - log2_inner_block_size; 00122 c0= c; 00123 todo= steps - 1 - step; 00124 for (nat j= 0; j < ((nat) 1<<step); j++) { 00125 ffter->ifft (c0, stride, ((shift>>todo) + j) <<todo, 00126 todo+1, max(0, (int) step1- (int) step), step2-step); 00127 c0 += (stride<<(todo+1)); 00128 } 00129 } 00130 else 00131 step= step2; 00132 00133 while (step > step1) { 00134 block = min (step - step1, log2_outer_block_size); 00135 step -= block; 00136 todo = steps - 1 - step; 00137 blocks= todo - block + 1; 00138 c0= c; 00139 for (nat j= 0; j < ((nat) 1<<step); j++) { 00140 for (k_beg= 0; k_beg < ((nat) 1<<blocks); k_beg= k_end) { 00141 k_end= min (((nat) 1<<blocks), 00142 k_beg + ((nat) 1<< log2_block_number)); 00143 delocate (temp, (nat) 1<<block, 00144 c0 + k_beg, stride<<blocks, k_end - k_beg); 00145 for (nat k= 0; k < k_end - k_beg; k++) 00146 ffter->ifft (temp+(k<<block), 1, 00147 ((shift>>todo) + j)<<(block-1), block); 00148 relocate (temp, (nat) 1<<block, 00149 c0+k_beg, stride<<blocks, k_end - k_beg); 00150 } 00151 c0 += (stride<<(todo+1)); 00152 } 00153 } 00154 mmx_delete<C> (temp, temp_len); 00155 } 00156 } 00157 00158 inline void 00159 dfft (C* c, nat stride, nat shift, nat steps) { 00160 dfft (c, stride, shift, steps, 0, steps); } 00161 00162 inline void 00163 ifft (C* c, nat stride, nat shift, nat steps) { 00164 ifft (c, stride, shift, steps, 0, steps); } 00165 00166 inline void 00167 direct_transform (C* c) { 00168 dfft (c, 1, 0, depth); } 00169 00170 inline void 00171 inverse_transform (C* c, bool divide=true) { 00172 ifft (c, 1, 0, depth); 00173 if (divide) { 00174 S x= binpow (S (2), depth); 00175 x= invert (x); 00176 NVec::template vec_unary_scalar<typename R::fft_mul_sc_op> (c, x, len); 00177 } 00178 } 00179 }; 00180 00181 } // namespace mmx 00182 #endif //__MMX__FFT_BLOCKS__HPP