html/doxygen/matrix__aligned_8hpp_source.html

00001
00002 /******************************************************************************
00003 * MODULE     : matrix_aligned.hpp
00004 * DESCRIPTION: matrices aligned in memory
00005 * COPYRIGHT  : (C) 2009  Joris van der Hoeven and Gregoire Lecerf
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012
00013 #ifndef __MMX__MATRIX_ALIGNED__HPP
00014 #define __MMX__MATRIX_ALIGNED__HPP
00015 #include <basix/int.hpp>
00016 #include <numerix/simd.hpp>
00017 #include <algebramix/vector_aligned.hpp>
00018
00019 namespace mmx {
00020
00021 /******************************************************************************
00022 *  Variants for matrices aligned in memory
00023 ******************************************************************************/
00024
00025 template<typename V, typename W>
00026 struct matrix_assume_aligned: public V {
00027   // All the columns of these matrices are assumed to be aligned
00028   // V is the variant to be used on base types
00029   // W is the variant to be used on simd types (see matrix_sse.hpp)
00030
00031   typedef vector_assume_aligned<typename V::Vec,
00032                                 typename W::Vec> Vec;
00033   typedef matrix_assume_aligned<typename V::Naive,
00034                                 typename W::Naive> Naive;
00035   typedef matrix_assume_aligned<typename V::Positive,
00036                                 typename W::Positive> Positive;
00037   typedef typename V::No_aligned No_aligned;
00038   typedef matrix_assume_aligned<typename V::No_thread,
00039                                 typename W::No_thread> No_thread;
00040   typedef matrix_assume_aligned<typename V::No_scaled,
00041                                 typename W::No_scaled> No_scaled;
00042 };
00043
00044 template<typename F, typename Z, typename V, typename W>
00045 struct implementation<F,Z,matrix_assume_aligned<V,W> >:
00046     public implementation<F,Z,V> {};
00047
00048 template<typename Z, typename V, typename W>
00049 struct implementation<vector_allocate,Z,matrix_assume_aligned<V,W> >:
00050   public implementation<vector_allocate,
00051                         typename matrix_assume_aligned<V,W>::Vec> {};
00052
00053 template<typename V, typename W>
00054 struct matrix_aligned: public V {
00055   // These matrices are not necessarily aligned in memory
00056   typedef vector_aligned<typename V::Vec,
00057                          typename W::Vec> Vec;
00058   typedef matrix_aligned<typename V::Naive,
00059                          typename W::Naive> Naive;
00060   typedef matrix_aligned<typename V::Positive,
00061                          typename W::Positive> Positive;
00062   typedef typename V::No_aligned No_aligned;
00063   typedef matrix_aligned<typename V::No_thread,
00064                          typename W::No_thread> No_thread;
00065   typedef matrix_aligned<typename V::No_scaled,
00066                          typename W::No_scaled> No_scaled;
00067 };
00068
00069 template<typename F, typename Z, typename V, typename W>
00070 struct implementation<F,Z,matrix_aligned<V,W> >:
00071     public implementation<F,Z,V> {};
00072
00073 template<typename Z, typename V, typename W>
00074 struct implementation<vector_allocate,Z,matrix_aligned<V,W> >:
00075   public implementation<vector_allocate,
00076                         typename matrix_aligned<V,W>::Vec> {};
00077
00078 /******************************************************************************
00079 * Helpers to be specialized on aligned matrices hardware data
00080 ******************************************************************************/
00081
00082 template<typename V, typename W,
00083          typename Op, typename D, typename S1, typename S2>
00084 struct mat_mul_aligned_helper {
00085   static inline void
00086   mul (D* d, const S1* s1, const S2* s2,
00087        nat r, nat rr, nat l, nat ll, nat c, nat cc)
00088   {
00089     typedef implementation<matrix_multiply,V> Mat;
00090     Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00091   }
00092 };
00093
00094 /******************************************************************************
00095 * Aligned matrix multiplication
00096 ******************************************************************************/
00097
00098 template<typename Z, typename V, typename W>
00099 struct implementation<matrix_multiply_base,Z,matrix_assume_aligned<V,W> >:
00100   public implementation<matrix_linear,Z>
00101 {
00102   template<typename Op, typename D, typename S1, typename S2>
00103   static inline void
00104   mul (D* d, const S1* s1, const S2* s2,
00105        nat r, nat rr, nat l, nat ll, nat c, nat cc)
00106   {
00107     mat_mul_aligned_helper<V,W,Op,D,S1,S2>::
00108       mul (d, s1, s2, r, rr, l, ll, c, cc);
00109   }
00110 }; // implementation<matrix_multiply_base,Z,matrix_assume_aligned<V,W> >
00111
00112 /******************************************************************************
00113 * Matrix multiplication, not necessarily aligned
00114 ******************************************************************************/
00115
00116 template<typename Z, typename V, typename W>
00117 struct implementation<matrix_multiply_base,Z,matrix_aligned<V,W> >:
00118   public implementation<matrix_linear,Z>
00119 {
00120   typedef implementation<matrix_multiply,V> Mat;
00121   typedef implementation<matrix_multiply,matrix_assume_aligned<V,W> > AMat;
00122   typedef implementation<vector_allocate,
00123                          typename matrix_aligned<V,W>::Vec> Vec;
00124
00125   template<typename Op, typename D, typename S1, typename S2>
00126   static inline void
00127   mul (D* d, const S1* s1, const S2* s2,
00128        nat r, nat rr, nat l, nat ll, nat c, nat cc)
00129   {
00130     if (r == 0) return;
00131     if (mask_helper<D>::len != 16 || mask_helper<S1>::len != 16) {
00132       Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00133       return;
00134     }
00135     nat ar= Vec::template vec_floor_aligned_size<D> (r);
00136     nat sr= r - ar;
00137     nat arr= Vec::template vec_floor_aligned_size<D> (rr);
00138     if (sr != 0) {
00139       if (ar != 0) mul<Op> (d, s1, s2, ar, rr, l, ll, c, cc);
00140       Mat::template mul<Op> (d  + Mat::index (ar, 0, rr, cc),
00141                              s1 + Mat::index (ar, 0, rr, ll), s2,
00142                              sr, rr, l, ll, c, cc);
00143       // NOTE: row times matrix can be further optimized for SIMD instructions
00144     }
00145     else if (! Vec::template vec_is_aligned (d) ||
00146              ! Vec::template vec_is_aligned (s1) ||
00147              arr != rr) {
00148       D* xd = mmx_new<D> (r * c);
00149       S1* xs1= mmx_new<S1> (r * l);
00150       S2* xs2= mmx_new<S2> (l * c);
00151       Mat::template mat_unary_stride<id_op>
00152         (xs1, Mat::index (1, 0, r , l ), Mat::index (0, 1, r , l ),
00153          s1 , Mat::index (1, 0, rr, ll), Mat::index (0, 1, rr, ll), r, l);
00154       Mat::template mat_unary_stride<id_op>
00155         (xs2, Mat::index (1, 0, l , c ), Mat::index (0, 1, l , c ),
00156          s2 , Mat::index (1, 0, ll, cc), Mat::index (0, 1, ll, cc), l, c);
00157       AMat::template mul<Op> (xd, xs1, xs2, r, r, l, l, c, c);
00158       Mat::template mat_unary_stride<typename Op::nomul_op>
00159         (d , Mat::index (1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00160          xd, Mat::index (1, 0, r , c ), Mat::index (0, 1, r , c ), r, c);
00161       mmx_delete<D> (xd, r * c);
00162       mmx_delete<S1> (xs1, r * l);
00163       mmx_delete<S2> (xs2, l * c);
00164     }
00165     else
00166       AMat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, c);
00167   }
00168
00169 }; // implementation<matrix_multiply_base,Z,matrix_aligned<V,W> >
00170
00171 } // namespace mmx
00172 #endif // __MMX__MATRIX_ALIGNED__HPP