algebramix_doc 0.3
|
00001 00002 /****************************************************************************** 00003 * MODULE : matrix_aligned.hpp 00004 * DESCRIPTION: matrices aligned in memory 00005 * COPYRIGHT : (C) 2009 Joris van der Hoeven and Gregoire Lecerf 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX__MATRIX_ALIGNED__HPP 00014 #define __MMX__MATRIX_ALIGNED__HPP 00015 #include <basix/int.hpp> 00016 #include <numerix/simd.hpp> 00017 #include <algebramix/vector_aligned.hpp> 00018 00019 namespace mmx { 00020 00021 /****************************************************************************** 00022 * Variants for matrices aligned in memory 00023 ******************************************************************************/ 00024 00025 template<typename V, typename W> 00026 struct matrix_assume_aligned: public V { 00027 // All the columns of these matrices are assumed to be aligned 00028 // V is the variant to be used on base types 00029 // W is the variant to be used on simd types (see matrix_sse.hpp) 00030 00031 typedef vector_assume_aligned<typename V::Vec, 00032 typename W::Vec> Vec; 00033 typedef matrix_assume_aligned<typename V::Naive, 00034 typename W::Naive> Naive; 00035 typedef matrix_assume_aligned<typename V::Positive, 00036 typename W::Positive> Positive; 00037 typedef typename V::No_aligned No_aligned; 00038 typedef matrix_assume_aligned<typename V::No_thread, 00039 typename W::No_thread> No_thread; 00040 typedef matrix_assume_aligned<typename V::No_scaled, 00041 typename W::No_scaled> No_scaled; 00042 }; 00043 00044 template<typename F, typename Z, typename V, typename W> 00045 struct implementation<F,Z,matrix_assume_aligned<V,W> >: 00046 public implementation<F,Z,V> {}; 00047 00048 template<typename Z, typename V, typename W> 00049 struct implementation<vector_allocate,Z,matrix_assume_aligned<V,W> >: 00050 public implementation<vector_allocate, 00051 typename matrix_assume_aligned<V,W>::Vec> {}; 00052 00053 template<typename V, typename W> 00054 struct matrix_aligned: public V { 00055 // These matrices are not necessarily aligned in memory 00056 typedef vector_aligned<typename V::Vec, 00057 typename W::Vec> Vec; 00058 typedef matrix_aligned<typename V::Naive, 00059 typename W::Naive> Naive; 00060 typedef matrix_aligned<typename V::Positive, 00061 typename W::Positive> Positive; 00062 typedef typename V::No_aligned No_aligned; 00063 typedef matrix_aligned<typename V::No_thread, 00064 typename W::No_thread> No_thread; 00065 typedef matrix_aligned<typename V::No_scaled, 00066 typename W::No_scaled> No_scaled; 00067 }; 00068 00069 template<typename F, typename Z, typename V, typename W> 00070 struct implementation<F,Z,matrix_aligned<V,W> >: 00071 public implementation<F,Z,V> {}; 00072 00073 template<typename Z, typename V, typename W> 00074 struct implementation<vector_allocate,Z,matrix_aligned<V,W> >: 00075 public implementation<vector_allocate, 00076 typename matrix_aligned<V,W>::Vec> {}; 00077 00078 /****************************************************************************** 00079 * Helpers to be specialized on aligned matrices hardware data 00080 ******************************************************************************/ 00081 00082 template<typename V, typename W, 00083 typename Op, typename D, typename S1, typename S2> 00084 struct mat_mul_aligned_helper { 00085 static inline void 00086 mul (D* d, const S1* s1, const S2* s2, 00087 nat r, nat rr, nat l, nat ll, nat c, nat cc) 00088 { 00089 typedef implementation<matrix_multiply,V> Mat; 00090 Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc); 00091 } 00092 }; 00093 00094 /****************************************************************************** 00095 * Aligned matrix multiplication 00096 ******************************************************************************/ 00097 00098 template<typename Z, typename V, typename W> 00099 struct implementation<matrix_multiply_base,Z,matrix_assume_aligned<V,W> >: 00100 public implementation<matrix_linear,Z> 00101 { 00102 template<typename Op, typename D, typename S1, typename S2> 00103 static inline void 00104 mul (D* d, const S1* s1, const S2* s2, 00105 nat r, nat rr, nat l, nat ll, nat c, nat cc) 00106 { 00107 mat_mul_aligned_helper<V,W,Op,D,S1,S2>:: 00108 mul (d, s1, s2, r, rr, l, ll, c, cc); 00109 } 00110 }; // implementation<matrix_multiply_base,Z,matrix_assume_aligned<V,W> > 00111 00112 /****************************************************************************** 00113 * Matrix multiplication, not necessarily aligned 00114 ******************************************************************************/ 00115 00116 template<typename Z, typename V, typename W> 00117 struct implementation<matrix_multiply_base,Z,matrix_aligned<V,W> >: 00118 public implementation<matrix_linear,Z> 00119 { 00120 typedef implementation<matrix_multiply,V> Mat; 00121 typedef implementation<matrix_multiply,matrix_assume_aligned<V,W> > AMat; 00122 typedef implementation<vector_allocate, 00123 typename matrix_aligned<V,W>::Vec> Vec; 00124 00125 template<typename Op, typename D, typename S1, typename S2> 00126 static inline void 00127 mul (D* d, const S1* s1, const S2* s2, 00128 nat r, nat rr, nat l, nat ll, nat c, nat cc) 00129 { 00130 if (r == 0) return; 00131 if (mask_helper<D>::len != 16 || mask_helper<S1>::len != 16) { 00132 Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc); 00133 return; 00134 } 00135 nat ar= Vec::template vec_floor_aligned_size<D> (r); 00136 nat sr= r - ar; 00137 nat arr= Vec::template vec_floor_aligned_size<D> (rr); 00138 if (sr != 0) { 00139 if (ar != 0) mul<Op> (d, s1, s2, ar, rr, l, ll, c, cc); 00140 Mat::template mul<Op> (d + Mat::index (ar, 0, rr, cc), 00141 s1 + Mat::index (ar, 0, rr, ll), s2, 00142 sr, rr, l, ll, c, cc); 00143 // NOTE: row times matrix can be further optimized for SIMD instructions 00144 } 00145 else if (! Vec::template vec_is_aligned (d) || 00146 ! Vec::template vec_is_aligned (s1) || 00147 arr != rr) { 00148 D* xd = mmx_new<D> (r * c); 00149 S1* xs1= mmx_new<S1> (r * l); 00150 S2* xs2= mmx_new<S2> (l * c); 00151 Mat::template mat_unary_stride<id_op> 00152 (xs1, Mat::index (1, 0, r , l ), Mat::index (0, 1, r , l ), 00153 s1 , Mat::index (1, 0, rr, ll), Mat::index (0, 1, rr, ll), r, l); 00154 Mat::template mat_unary_stride<id_op> 00155 (xs2, Mat::index (1, 0, l , c ), Mat::index (0, 1, l , c ), 00156 s2 , Mat::index (1, 0, ll, cc), Mat::index (0, 1, ll, cc), l, c); 00157 AMat::template mul<Op> (xd, xs1, xs2, r, r, l, l, c, c); 00158 Mat::template mat_unary_stride<typename Op::nomul_op> 00159 (d , Mat::index (1, 0, rr, cc), Mat::index (0, 1, rr, cc), 00160 xd, Mat::index (1, 0, r , c ), Mat::index (0, 1, r , c ), r, c); 00161 mmx_delete<D> (xd, r * c); 00162 mmx_delete<S1> (xs1, r * l); 00163 mmx_delete<S2> (xs2, l * c); 00164 } 00165 else 00166 AMat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, c); 00167 } 00168 00169 }; // implementation<matrix_multiply_base,Z,matrix_aligned<V,W> > 00170 00171 } // namespace mmx 00172 #endif // __MMX__MATRIX_ALIGNED__HPP