html/doxygen/matrix__modular__int_8hpp_source.html

00001
00002 /******************************************************************************
00003 * MODULE     : matrix_modular_int.hpp
00004 * DESCRIPTION: modular matrices over hardware integers
00005 * COPYRIGHT  : (C) 2009  Joris van der Hoeven and Gregoire Lecerf
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012
00013 #ifndef __MMX__MATRIX_MODULAR_INT__HPP
00014 #define __MMX__MATRIX_MODULAR_INT__HPP
00015 #include <numerix/modular_int.hpp>
00016 #include <algebramix/matrix_int.hpp>
00017 #include <algebramix/matrix_double.hpp>
00018 #include <algebramix/matrix_modular.hpp>
00019
00020 namespace mmx {
00021 #define TMPL template<typename C, typename V1, typename V2>
00022 #define Modulus modulus<C,V1>
00023 #define Modular modular<Modulus,V2>
00024
00025 /******************************************************************************
00026 * Variant
00027 ******************************************************************************/
00028
00029 typedef matrix_unrolled<4,matrix_unrolled<4,matrix_naive> >
00030         matrix_unrolled_4_4;
00031
00032 DEFINE_VARIANT (matrix_modular_int,
00033                 matrix_strassen<
00034                   matrix_threads<
00035                     matrix_unrolled_4_4> >)
00036
00037 #define DECLARE_HELPER(I,V1)                                            \
00038   template<typename V2, nat m>                                          \
00039   struct matrix_variant_helper<modular<modulus<I,V1>,V2> > {            \
00040     typedef matrix_naive Naive;                                         \
00041     typedef matrix_unrolled<4,matrix_unrolled<2,Naive> > Unrolled;      \
00042     typedef typename Matrix_simd_variant(I) Simd;                       \
00043     typedef matrix_modular_int MV;                                      \
00044   };
00045
00046 DECLARE_HELPER(signed char, modulus_int_naive<m>)
00047 DECLARE_HELPER(signed char, modulus_int_preinverse<m>)
00048 DECLARE_HELPER(unsigned char, modulus_int_naive<m>)
00049 DECLARE_HELPER(unsigned char, modulus_int_preinverse<m>)
00050 DECLARE_HELPER(short int, modulus_int_naive<m>)
00051 DECLARE_HELPER(short int, modulus_int_preinverse<m>)
00052 DECLARE_HELPER(unsigned short int, modulus_int_naive<m>)
00053 DECLARE_HELPER(unsigned short int, modulus_int_preinverse<m>)
00054 DECLARE_HELPER(int, modulus_int_naive<m>)
00055 DECLARE_HELPER(int, modulus_int_preinverse<m>)
00056 DECLARE_HELPER(unsigned int, modulus_int_naive<m>)
00057 DECLARE_HELPER(unsigned int, modulus_int_preinverse<m>)
00058 DECLARE_HELPER(long int, modulus_int_naive<m>)
00059 DECLARE_HELPER(long int, modulus_int_preinverse<m>)
00060 DECLARE_HELPER(unsigned long int, modulus_int_naive<m>)
00061 DECLARE_HELPER(unsigned long int, modulus_int_preinverse<m>)
00062 DECLARE_HELPER(long long int, modulus_int_naive<m>)
00063 DECLARE_HELPER(long long int, modulus_int_preinverse<m>)
00064 DECLARE_HELPER(unsigned long long int, modulus_int_naive<m>)
00065 DECLARE_HELPER(unsigned long long int, modulus_int_preinverse<m>)
00066 #undef DECLARE_HELPER
00067
00068 /******************************************************************************
00069 * Encoding and decoding from/to lifting type
00070 ******************************************************************************/
00071
00072 template<typename D>
00073 struct free_bits_helper {
00074   static const nat value= 8 * sizeof (D);
00075 };
00076
00077 template<typename V, typename D, typename C, typename V1, typename V2> void
00078 encode_modular_int (D* dest, const Modular* src,
00079                     nat r, nat rr, nat c, nat cc)
00080 {
00081   typedef implementation<matrix_linear,V> Mat;
00082   nat dest_rs= Mat::index (1, 0, r, c);
00083   nat dest_cs= Mat::index (0, 1, r, c);
00084   nat src_rs = Mat::index (1, 0, rr, cc);
00085   nat src_cs = Mat::index (0, 1, rr, cc);
00086   D* dest_row= dest;
00087   const Modular* src_row= src;
00088   for (nat i=0; i<r; i++, dest_row += dest_rs, src_row += src_rs) {
00089     D* dest_col= dest_row;
00090     const Modular* src_col= src_row;
00091     for (nat j=0; j<c; j++, dest_col += dest_cs, src_col += src_cs)
00092       *dest_col= (D) (*(*src_col));
00093   }
00094 }
00095
00096 template<typename Op, typename V, typename C, typename D,
00097           typename V1, typename V2> void
00098 decode_modular_int (Modular* dest, const D* src,
00099                     nat r, nat rr, nat c, nat cc)
00100 {
00101   typedef implementation<matrix_linear,V> Mat;
00102   typedef typename Op::nomul_op Set;
00103   C p = *Modular::get_modulus ();
00104   nat dest_rs= Mat::index (1, 0, rr, cc);
00105   nat dest_cs= Mat::index (0, 1, rr, cc);
00106   nat src_rs = Mat::index (1, 0, r, c);
00107   nat src_cs = Mat::index (0, 1, r, c);
00108   Modular* dest_row= dest;
00109   const D* src_row= src;
00110   for (nat i=0; i<r; i++, dest_row += dest_rs, src_row += src_rs) {
00111     Modular* dest_col= dest_row;
00112     const D* src_col= src_row;
00113     for (nat j=0; j<c; j++, dest_col += dest_cs, src_col += src_cs)
00114       Set::set_op (*dest_col, Modular ((*src_col) % p, true));
00115   }
00116 }
00117
00118 #if defined(NUMERIX_ENABLE_SIMD)\
00119  && defined(ALGEBRAMIX_ENABLE_SIMD)\
00120  && defined(__SSE2__)
00121 // double are faster than int's on usual platforms supporting SSE2
00122 template<typename Op, typename V,
00123          typename C, typename V1, typename V2> void
00124 decode_modular_int (Modular* dest, const double* src,
00125                     nat r, nat rr, nat c, nat cc)
00126 {
00127   typedef implementation<matrix_linear,V> Mat;
00128   typedef typename Op::nomul_op Set;
00129   uint32_t p = (uint32_t) *Modular::get_modulus ();
00130   nat dest_rs= Mat::index (1, 0, rr, cc);
00131   nat dest_cs= Mat::index (0, 1, rr, cc);
00132   nat src_rs = Mat::index (1, 0, r, c);
00133   nat src_cs = Mat::index (0, 1, r, c);
00134   Modular* dest_row= dest;
00135   const double* src_row= src;
00136   for (nat i=0; i<r; i++, dest_row += dest_rs, src_row += src_rs) {
00137     Modular* dest_col= dest_row;
00138     const double* src_col= src_row;
00139     for (nat j=0; j<c; j++, dest_col += dest_cs, src_col += src_cs)
00140       Set::set_op (*dest_col, Modular (((uint64_t) (*src_col)) % p, true));
00141   }
00142 }
00143 STMPL
00144 struct free_bits_helper<double> {
00145   static const nat value= 53;
00146 };
00147 #endif // __SSE2__
00148
00149
00150 template<typename V>
00151 struct implementation<matrix_multiply_base,V,matrix_modular_int>:
00152   public implementation<matrix_linear,V>
00153 {
00154   typedef matrix_strassen<matrix_threads<matrix_unrolled_4_4> > W;
00155   typedef implementation<matrix_multiply,W> Fallback;
00156   typedef implementation<matrix_linear,W> Mat;
00157 private:
00158
00159 #if defined(NUMERIX_ENABLE_SIMD)\
00160  && defined(ALGEBRAMIX_ENABLE_SIMD)\
00161  && defined(__SSE2__)
00162 template<nat s>
00163 struct lifting_int_with_size_at_least_helper {
00164   template<bool b, typename T, typename E>
00165   struct if_helper { typedef T ret; };
00166   template<typename T, typename E>
00167   struct if_helper<false,T,E> { typedef E ret; };
00168   typedef
00169     typename if_helper<8*sizeof(short unsigned int) >= s, short unsigned int,
00170     typename if_helper<53 >= s, double,
00171       long long unsigned int>::ret >::ret type;
00172 };
00173
00174 #else
00175 template<nat s>
00176 struct lifting_int_with_size_at_least_helper {
00177   template<bool b, typename T, typename E>
00178   struct if_helper { typedef T ret; };
00179   template<typename T, typename E>
00180   struct if_helper<false,T,E> { typedef E ret; };
00181   typedef
00182     typename if_helper<8*sizeof(unsigned char)>=s, unsigned char,
00183     typename if_helper<8*sizeof(short unsigned int)>=s, short unsigned int,
00184     typename if_helper<8*sizeof(unsigned int)>=s, unsigned int,
00185     typename if_helper<8*sizeof(long unsigned int)>=s, long unsigned int,
00186     long long unsigned int>::ret >::ret >::ret >::ret type ;
00187 };
00188 #endif
00189
00190 template<typename Op, typename C, typename V1, typename V2>
00191 static inline void
00192 _mul_int (Modular* d, const Modular* s1, const Modular* s2,
00193          nat r, nat rr, nat l, nat ll, nat c, nat cc)
00194   {
00195     static const nat modulus_size= V1::template maximum_size_helper<C>::value;
00196     // 2 free bits are required
00197     static const nat required_size= (modulus_size << 1) + 2;
00198     typedef typename
00199       lifting_int_with_size_at_least_helper<required_size>::type D;
00200     // free bits in type D
00201     static const nat possible_free_bits=
00202       free_bits_helper<D>::value - (modulus_size << 1);
00203     static const nat free_bits= possible_free_bits >= 16
00204                                   ? 16 : possible_free_bits;
00205     const nat ul= MMX_SAFE_LEFT_SHIFT_INT(nat,1,free_bits);
00206     typedef typename Matrix_variant(D) W;
00207     typedef implementation<matrix_multiply,W> Mat_D;
00208
00209     if (free_bits == 0) {
00210       Fallback::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00211     }
00212     else if (l <= ul) {
00213       nat len_1= aligned_size<D,W> (r * l);
00214       nat len_2= aligned_size<D,W> (l * c);
00215       nat len_d= aligned_size<D,W> (r * c);
00216       nat spc = len_1 + len_2 + len_d;
00217       D* x1= mmx_new<D> (spc);
00218       D* x2= x1 + len_1;
00219       D* xd= x2 + len_2;
00220       encode_modular_int<W> (x1, s1, r, rr, l, ll);
00221       encode_modular_int<W> (x2, s2, l, ll, c, cc);
00222       Mat_D::template mul<Op> (xd, x1, x2, r, r, l, l, c, c);
00223       decode_modular_int<Op,W> (d, xd, r, rr, c, cc);
00224       mmx_delete<D> (x1, spc);
00225     }
00226     else {
00227       typedef typename Op::acc_op Acc;
00228       const nat ur= 32;
00229       const nat uc= 32;
00230
00231       nat nr= (r + ur - 1) / ur;
00232       nat nl= (l + ul - 1) / ul;
00233       nat nc= (c + uc - 1) / uc;
00234       for (nat ir=0; ir<nr; ir++)
00235         for (nat ic=0; ic<nc; ic++) {
00236           nat il=0;
00237           for (; il<1; il++) {
00238             mul<Op > (d  + Mat::index (ir*ur, ic*uc, rr, cc),
00239                       s1 + Mat::index (ir*ur, il*ul, rr, ll),
00240                       s2 + Mat::index (il*ul, ic*uc, ll, cc),
00241                       min (ur, r - ir*ur), rr,
00242                       min (ul, l - il*ul), ll,
00243                       min (uc, c - ic*uc), cc);
00244           }
00245           for (; il<nl; il++) {
00246             mul<Acc> (d  + Mat::index (ir*ur, ic*uc, rr, cc),
00247                       s1 + Mat::index (ir*ur, il*ul, rr, ll),
00248                       s2 + Mat::index (il*ul, ic*uc, ll, cc),
00249                       min (ur, r - ir*ur), rr,
00250                       min (ul, l - il*ul), ll,
00251                       min (uc, c - ic*uc), cc);
00252           }
00253         }
00254     }
00255   }
00256
00257 public:
00258
00259 template<typename Op, typename C, typename V1, typename V2>
00260 static inline void
00261 mul (Modular* d, const Modular* s1, const Modular* s2,
00262      nat r, nat rr, nat l, nat ll, nat c, nat cc)
00263   {
00264     _mul_int<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00265   }
00266
00267 }; // implementation<matrix_multiply_base,V,matrix_modular_int<MV> >
00268
00269 #undef TMPL
00270 #undef Modulus
00271 #undef Modular
00272 } // namespace mmx
00273
00274 #endif //__MMX__MATRIX_MODULAR_INT__HPP