algebramix_doc 0.3
|
00001 00002 /****************************************************************************** 00003 * MODULE : matrix_naive.hpp 00004 * DESCRIPTION: Naive matrix operations 00005 * COPYRIGHT : (C) 2007 J. der Hoeven and G. Lecerf and O. Ruatta 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX__MATRIX_NAIVE__HPP 00014 #define __MMX__MATRIX_NAIVE__HPP 00015 #include <basix/vector.hpp> 00016 00017 namespace mmx { 00018 #define TMPL template<typename C> 00019 00020 /****************************************************************************** 00021 * Naive variant for matrices 00022 ******************************************************************************/ 00023 00024 struct matrix_naive { 00025 typedef vector_naive Vec; // Matrices as vectors 00026 typedef matrix_naive Naive; // Naive variant 00027 typedef matrix_naive Positive; // Multiplication without subtractions 00028 typedef matrix_naive No_aligned; // Multiplication without memory alignement 00029 typedef matrix_naive No_simd; // Variant without SIMD instructions 00030 typedef matrix_naive No_thread; // Variant without threads 00031 typedef matrix_naive No_scaled; // Variant without preconditioning 00032 }; 00033 00034 template<typename C> 00035 struct matrix_variant_helper { 00036 typedef matrix_naive MV; 00037 }; 00038 00039 /****************************************************************************** 00040 * Matrix defaults 00041 ******************************************************************************/ 00042 00043 struct matrix_defaults {}; 00044 00045 template<typename V> 00046 struct implementation<matrix_defaults,V,matrix_naive> { 00047 static const nat def_rows = 0; 00048 static const nat def_cols = 0; 00049 static const nat init_rows= 1; 00050 static const nat init_cols= 1; 00051 }; // implementation<matrix_defaults,V,matrix_naive> 00052 00053 /****************************************************************************** 00054 * Inheritance from vectors 00055 ******************************************************************************/ 00056 00057 template<typename V> 00058 struct implementation<vector_defaults,V,matrix_naive>: 00059 public implementation<vector_defaults,V,typename V::Vec> {}; 00060 00061 template<typename V> 00062 struct implementation<vector_allocate,V,matrix_naive>: 00063 public implementation<vector_allocate,V,typename V::Vec> {}; 00064 00065 template<typename V> 00066 struct implementation<vector_abstractions,V,matrix_naive>: 00067 public implementation<vector_abstractions,V,typename V::Vec> {}; 00068 00069 template<typename V> 00070 struct implementation<vector_abstractions_stride,V,matrix_naive>: 00071 public implementation<vector_abstractions_stride,V,typename V::Vec> {}; 00072 00073 template<typename V> 00074 struct implementation<vector_linear,V,matrix_naive>: 00075 public implementation<vector_linear,V,typename V::Vec> {}; 00076 00077 template<typename C, typename V> inline nat 00078 aligned_size (nat r, nat c) { 00079 return aligned_size<C,V> (r * c); } 00080 00081 template<typename C> inline nat 00082 default_aligned_size (nat r, nat c) { 00083 return default_aligned_size<C> (r * c); } 00084 00085 /****************************************************************************** 00086 * Vectorial operations 00087 ******************************************************************************/ 00088 00089 struct matrix_vectorial {}; 00090 00091 template<typename V> 00092 struct implementation<matrix_vectorial,V,matrix_naive>: 00093 public implementation<matrix_defaults,V> 00094 { 00095 typedef implementation<vector_linear,V> Vec; 00096 00097 static inline nat 00098 index (nat row, nat col, nat rows, nat cols) { 00099 (void) cols; 00100 return col * rows + row; } 00101 00102 TMPL 00103 static inline const C& 00104 entry (const C* m, nat row, nat col, nat rows, nat cols) { 00105 return * (m + index (row, col, rows, cols)); } 00106 00107 /* 00108 static inline nat 00109 index (nat row, nat col, nat rows, nat cols) { 00110 (void) rows; 00111 return row * cols + col; } 00112 */ 00113 00114 template<typename Op, typename T, typename C> static inline void 00115 mat_unary_stride (T* dest, nat dest_rs, nat dest_cs, 00116 const C* src, nat src_rs, nat src_cs, 00117 nat rows, nat cols) { 00118 for (; cols != 0; dest += dest_cs, src += src_cs, cols--) 00119 Vec::template vec_unary_stride<Op> (dest, dest_rs, src, src_rs, rows); } 00120 00121 template<typename Op, typename T, typename C1, typename C2> static inline void 00122 mat_binary_stride (T* dest, nat dest_rs, nat dest_cs, 00123 const C1* src1, nat src1_rs, nat src1_cs, 00124 const C2* src2, nat src2_rs, nat src2_cs, 00125 nat rows, nat cols) { 00126 for (; cols != 0; dest += dest_cs, src1 += src1_cs, src2 += src2_cs, cols--) 00127 Vec::template vec_binary_stride<Op> 00128 (dest, dest_rs, src1, src1_rs, src2, src2_rs, rows); } 00129 00130 TMPL static inline void 00131 clear (C* dest, nat r, nat c) { 00132 Vec::clear (dest, r * c); 00133 } 00134 00135 TMPL static void 00136 set (C* dest, const C& c, nat rows, nat cols) { 00137 clear (dest, rows, cols); 00138 nat m= min (rows, cols), plus= index (1, 1, rows, cols); 00139 while (m != 0) { 00140 *dest= c; 00141 dest += plus; m--; } } 00142 00143 TMPL static void 00144 get_range (C* d, const C* s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) { 00145 nat rr= r2 - r1, cc = c2 - c1; 00146 mat_unary_stride<id_op> 00147 (d, index (1, 0, rr, cc), index (0, 1, rr, cc), 00148 s + index (r1, c1, r, c), index (1, 0, r, c), index (0, 1, r, c), 00149 rr, cc); 00150 } 00151 00152 TMPL static void 00153 clear_range (C* d, nat r1, nat c1, nat r2, nat c2, nat r, nat c) { 00154 d += index (r1, c1, r, c); 00155 C* dd= d; 00156 nat rp= index (1, 0, r, c), cp= index (0, 1, r, c); 00157 for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd) 00158 for (nat i=0; i<(r2-r1); i++, d += rp) 00159 clear_op::set_op (*d); 00160 } 00161 00162 TMPL static void 00163 set_range (C* d, const C& s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) { 00164 d += index (r1, c1, r, c); 00165 C* dd= d; 00166 nat rp= index (1, 0, r, c), cp= index (0, 1, r, c); 00167 for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd) 00168 for (nat i=0; i<(r2-r1); i++, d += rp) 00169 if (i == j) *d = s; 00170 else clear_op::set_op (*d); 00171 } 00172 00173 TMPL static void 00174 copy (C* dest, const C* s, nat r, nat c) { 00175 Vec::copy (dest, s, r * c); } 00176 00177 TMPL static void 00178 transpose (C* dest, const C* src, nat rows, nat cols) { 00179 mat_unary_stride<id_op> 00180 (dest, index (0, 1, cols, rows), index (1, 0, cols, rows), 00181 src, index (1, 0, rows, cols), index (0, 1, rows, cols), 00182 rows, cols); } 00183 00184 TMPL static inline void 00185 print (const port& out, const C* s, nat rows, nat cols) { 00186 out << "[ "; 00187 for (nat i=0; i < rows; i++) { 00188 for (nat j=0; j < cols; j++) { 00189 out << * (s + index (i, j, rows, cols)); 00190 if (j + 1 != cols) out << ", "; 00191 } 00192 if (i + 1 != rows) out << ";" << lf; 00193 } 00194 out << " ]"; 00195 } 00196 00197 }; // implementation<matrix_vectorial,V,matrix_naive> 00198 00199 /****************************************************************************** 00200 * Operations on rows and columns 00201 ******************************************************************************/ 00202 00203 struct matrix_linear {}; 00204 00205 template<typename V> 00206 struct implementation<matrix_linear,V,matrix_naive>: 00207 public implementation<matrix_vectorial,V> 00208 { 00209 typedef implementation<matrix_vectorial,V> Mat; 00210 typedef implementation<vector_linear,V> Vec; 00211 00212 template<typename Op, typename C, typename X> static inline void 00213 col_unary_scalar (C* dest, const X& x, nat c1, nat r, nat rr, nat cc) { 00214 Vec::template vec_unary_scalar_stride<Op> 00215 (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc), x, r); 00216 } 00217 00218 template<typename Op, typename C, typename X> static inline void 00219 col_binary_scalar (C* dest, const X& x, nat c1, nat c2, nat r, 00220 nat rr, nat cc) { 00221 Vec::template vec_binary_scalar_stride<Op> 00222 (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc), 00223 dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), x, r); 00224 } 00225 00226 template<typename Op, typename C> static inline void 00227 col_binary_combine (C* dest, nat c1, nat c2, nat r, nat rr, nat cc) { 00228 Vec::template vec_binary_combine_stride<Op> 00229 (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc), 00230 dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), r); 00231 } 00232 00233 template<typename Op, typename C, typename X> static inline void 00234 row_unary_scalar (C* dest, const X& x, nat r1, nat c, nat rr, nat cc) { 00235 Vec::template vec_unary_scalar_stride<Op> 00236 (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c); 00237 } 00238 00239 template<typename Op, typename C, typename X> static inline void 00240 row_binary_scalar (C* dest, const X& x, nat r1, nat r2, nat c, 00241 nat rr, nat cc) { 00242 Vec::template vec_binary_scalar_stride<Op> 00243 (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc), 00244 dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c); 00245 } 00246 00247 template<typename Op, typename C> static inline void 00248 row_binary_combine (C* dest, nat r1, nat r2, nat c, nat rr, nat cc) { 00249 Vec::template vec_binary_combine_stride<Op> 00250 (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc), 00251 dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), c); 00252 } 00253 00254 TMPL static void col_mul (C* s, const C& sc, nat i, nat r, nat c) { 00255 ASSERT (i<c, "out of range"); 00256 col_unary_scalar<rmul_op> (s, sc, i, r, r, c); } 00257 TMPL static void col_div (C* s, const C& sc, nat i, nat r, nat c) { 00258 ASSERT (i<c, "out of range"); 00259 col_unary_scalar<rdiv_op> (s, sc, i, r, r, c); } 00260 TMPL static void row_mul (C* s, const C& sc, nat i, nat r, nat c) { 00261 ASSERT (i<r, "out of range"); 00262 row_unary_scalar<rmul_op> (s, sc, i, c, r, c); } 00263 TMPL static void row_div (C* s, const C& sc, nat i, nat r, nat c) { 00264 ASSERT (i<r, "out of range"); 00265 row_unary_scalar<rdiv_op> (s, sc, i, c, r, c); } 00266 00267 TMPL static void col_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) { 00268 ASSERT (i<c && j<c, "out of range"); 00269 col_binary_scalar<mul_sub_op> (s, sc, i, j, r, r, c); } 00270 TMPL static void row_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) { 00271 ASSERT (i<r && j<r, "out of range"); 00272 row_binary_scalar<mul_sub_op> (s, sc, i, j, c, r, c); } 00273 TMPL static void col_sweep (C* s, nat i, nat j, nat r, const C& sc, 00274 nat rr, nat cc) { 00275 col_binary_scalar<mul_sub_op> (s, sc, i, j, r, rr, cc); } 00276 TMPL static void row_sweep (C* s, nat i, nat j, nat c, const C& sc, 00277 nat rr, nat cc) { 00278 row_binary_scalar<mul_sub_op> (s, sc, i, j, c, rr, cc); } 00279 00280 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat c) { 00281 ASSERT (i<c && j<c, "out of range"); 00282 col_binary_combine<swap_op> (s, i, j, r, r, c); } 00283 TMPL static void row_swap (C* s, nat i, nat j, nat r, nat c) { 00284 ASSERT (i<r && j<r, "out of range"); 00285 row_binary_combine<swap_op> (s, i, j, c, r, c); } 00286 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat rr, nat cc) { 00287 col_binary_combine<swap_op> (s, i, j, r, rr, cc); } 00288 TMPL static void row_swap (C* s, nat i, nat j, nat c, nat rr, nat cc) { 00289 row_binary_combine<swap_op> (s, i, j, c, rr, cc); } 00290 00291 TMPL static void 00292 row_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) { 00293 ASSERT (i<r && j<r, "out of range"); 00294 C* iti= s + Mat::index (i, 0, r, c); 00295 C* itj= s + Mat::index (j, 0, r, c); 00296 nat plus= Mat::index (0, 1, r, c); 00297 while (c>0) { 00298 *iti= si * (*iti) - sj * (*itj); 00299 iti += plus; itj += plus; c--; } } 00300 00301 TMPL static void 00302 col_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) { 00303 ASSERT (i<c && j<c, "out of range"); 00304 C* iti= s + Mat::index (0, i, r, c); 00305 C* itj= s + Mat::index (0, j, r, c); 00306 nat plus= Mat::index (1, 0, r, c); 00307 while (r>0) { 00308 *iti= si * (*iti) - sj * (*itj); 00309 iti += plus; itj += plus; r--; } } 00310 00311 TMPL static bool 00312 row_is_zero (const C* s, nat i, nat r, nat c) { 00313 return Vec::template vec_binary_test_scalar_stride<equal_op> 00314 (s + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c), C (0), c); } 00315 00316 TMPL static bool 00317 col_is_zero (const C* s, nat j, nat r, nat c) { 00318 return Vec::template vec_binary_test_scalar_stride<equal_op> 00319 (s + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), C (0), r); } 00320 00321 }; // implementation<matrix_linear,V,matrix_naive> 00322 00323 /****************************************************************************** 00324 * Multiplication 00325 ******************************************************************************/ 00326 00327 template<typename V> 00328 struct matrix_multiply_threshold {}; 00329 00330 struct matrix_multiply_base {}; 00331 struct matrix_multiply {}; 00332 00333 template<typename V> 00334 struct implementation<matrix_multiply_base,V,matrix_naive>: 00335 public implementation<matrix_linear,V> 00336 { 00337 typedef implementation<matrix_linear,V> Mat; 00338 00339 template<typename Op, typename C> 00340 struct clear_helper { 00341 static inline void op (C* d, nat r, nat rr, nat c, nat cc) { 00342 clear_range (d, 0, 0, r, c, rr, cc); } 00343 }; 00344 00345 template<typename C> 00346 struct clear_helper<mul_add_op,C> { 00347 static inline void op (C* d, nat r, nat rr, nat c, nat cc) {} 00348 }; 00349 00350 template<typename Op, typename C> static inline void 00351 clr (C* d, nat r, nat rr, nat c, nat cc) { 00352 clear_helper<Op,C>::op (d, r, rr, c, cc); 00353 } 00354 00355 template<typename Op, typename D, typename S1, typename S2> 00356 static inline void 00357 mul (D* d, const S1* s1, const S2* s2, 00358 nat r, nat rr, nat l, nat ll, nat c, nat cc) { 00359 typedef typename Op::acc_op Acc; 00360 if (l == 0) clr<Op> (d, r, rr, c, cc); 00361 else { 00362 nat ri, ci, li; 00363 D *dr, *dc; 00364 const S1 *s1r, *s1c; 00365 const S2 *s2r, *s2c; 00366 nat drp = Mat::index (1, 0, rr, cc), dcp = Mat::index (0, 1, rr, cc); 00367 nat s1rp= Mat::index (1, 0, rr, ll), s1cp= Mat::index (0, 1, rr, ll); 00368 nat s2rp= Mat::index (1, 0, ll, cc), s2cp= Mat::index (0, 1, ll, cc); 00369 for (ri= r, dr= d, s1r= s1; ri!=0; ri--, dr += drp, s1r += s1rp) 00370 for (ci= c, dc= dr, s2c= s2; ci!=0; ci--, dc += dcp, s2c += s2cp) { 00371 D tmp= *dc; 00372 for (li= l, s1c= s1r, s2r= s2c; li==l; li--, s1c += s1cp, s2r += s2rp) 00373 Op ::set_op (tmp, *s1c, *s2r); 00374 for ( ; li!=0; li--, s1c += s1cp, s2r += s2rp) 00375 Acc::set_op (tmp, *s1c, *s2r); 00376 *dc= tmp; 00377 } 00378 } 00379 } 00380 00381 }; // implementation<matrix_multiply_base,V,matrix_naive> 00382 00383 template<typename V> 00384 struct implementation<matrix_multiply,V,matrix_naive>: 00385 public implementation<matrix_multiply_base,V> 00386 { 00387 typedef implementation<matrix_multiply_base,V> Mat; 00388 00389 template<typename Op, typename D, typename S1, typename S2> 00390 static inline void 00391 mul (D* d, const S1* s1, const S2* s2, 00392 nat r, nat rr, nat l, nat ll, nat c, nat cc) { 00393 Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc); 00394 } 00395 00396 template<typename D, typename S1, typename S2> static inline void 00397 mul (D* dest, const S1* m1, const S2* m2, nat r, nat l, nat c) { 00398 Mat::template mul<mul_op> (dest, m1, m2, r, r, l, l, c, c); 00399 } 00400 00401 }; // implementation<matrix_multiply,V,matrix_naive> 00402 00403 /****************************************************************************** 00404 * Matrix product iteration 00405 ******************************************************************************/ 00406 00407 struct matrix_iterate {}; 00408 00409 template<typename V> 00410 struct implementation<matrix_iterate,V,matrix_naive>: 00411 public implementation<matrix_multiply,V> 00412 { 00413 typedef implementation<matrix_multiply,V> Mat; 00414 00415 TMPL static inline void 00416 iterate_mul (C** d, const C* s, const C* x, 00417 nat rs, nat cs, nat rx, nat cx, nat n) { 00418 // return s^0 x, s^1 x,..., s^{n-1} x in d 00419 // d must have allocated size n * rs * cx 00420 VERIFY (cs == rx && rs == cs, "sizes do not match"); 00421 if (rs == 0) return; 00422 Mat::get_range (d[0], x, 0, 0, rx, cx, rx, cx); 00423 for (nat i = 1; i < n; i++) 00424 Mat::mul (d[i], s, d[i-1], rs, cs, cx); 00425 } 00426 00427 TMPL static inline void 00428 project_iterate_mul (C** d, const C* y, const C* s, const C* x, 00429 nat ry, nat cy, nat rs, nat cs, nat rx, nat cx, nat n) { 00430 // return y s^0 x, y s^1 x,..., y s^{n-1} x in d 00431 VERIFY (cy == rs && rs == cs && cs == rx, "sizes do not match"); 00432 if (n == 0) return; 00433 nat l= aligned_size<C,V> (rx * cx); 00434 C* buf= mmx_new<C> (l << 1); 00435 C* t1= buf, * t2= t1 + l; 00436 Mat::get_range (t1, x, 0, 0, rx, cx, rx, cx); 00437 Mat::mul (d[0], y, t1, ry, cy, cx); 00438 for (nat i = 1; i < n; i++) { 00439 Mat::mul (t2, s, t1, rs, cs, cx); 00440 Mat::mul (d[i], y, t2, ry, cy, cx); 00441 swap (t1, t2); 00442 } 00443 mmx_delete<C> (buf, l << 1); 00444 } 00445 00446 }; // implementation<matrix_iterate,V,matrix_naive> 00447 00448 /****************************************************************************** 00449 * Low level action of permutations on matrices 00450 ******************************************************************************/ 00451 00452 struct matrix_permute {}; 00453 00454 template<typename V> 00455 struct implementation<matrix_permute,V,matrix_naive>: 00456 public implementation<matrix_multiply,V> 00457 { 00458 typedef implementation<matrix_multiply,V> Mat; 00459 typedef implementation<vector_linear,V> Vec; 00460 00461 TMPL static void 00462 col_permute (C* dest, const C* m, const nat* p, nat r, nat c) { 00463 // Permute columns of r x c matrix according to length c permutation p 00464 nat rs= Mat::index (1, 0, r, c); 00465 for (nat i=0; i<c; i++) 00466 Vec::template vec_unary_stride<id_op> 00467 (dest + Mat::index (0, i, r, c), rs, 00468 m + Mat::index (0, p[i], r, c), rs, r); 00469 } 00470 00471 TMPL static void 00472 row_permute (C* dest, const C* m, const nat* p, nat r, nat c) { 00473 // Permute rows of r x c matrix according to length c permutation p 00474 nat cs= Mat::index (0, 1, r, c); 00475 for (nat i=0; i<r; i++) 00476 Vec::template vec_unary_stride<id_op> 00477 (dest + Mat::index (i, 0, r, c), cs, 00478 m + Mat::index (p[i], 0, r, c), cs, c); 00479 } 00480 00481 TMPL static void 00482 col_permute (C* m, const nat* p, nat r, nat c, bool inv= false) { 00483 // Permute columns of r x c matrix according to length c permutation p 00484 // Apply the inverse permutation if inv is true 00485 nat q[c]; 00486 if (inv) for (nat i=0; i<c; i++) q[i]= p[i]; 00487 else for (nat i=0; i<c; i++) q[p[i]]= i; 00488 for (nat i=0; i<c; ) { 00489 if (q[i] == i) i++; 00490 else { 00491 nat j= q[i], k= q[j]; 00492 Mat::col_swap (m, i, j, r, c); 00493 q[i]= k; q[j]= j; 00494 } 00495 } 00496 } 00497 00498 TMPL static void 00499 row_permute (C* m, const nat* p, nat r, nat c, bool inv= false) { 00500 // Permute rows of r x c matrix according to length c permutation p 00501 // Apply the inverse permutation if inv is true 00502 nat q[r]; 00503 if (inv) for (nat i=0; i<c; i++) q[i]= p[i]; 00504 else for (nat i=0; i<c; i++) q[p[i]]= i; 00505 for (nat i=0; i<r; ) { 00506 if (q[i] == i) i++; 00507 else { 00508 nat j= q[i], k= q[j]; 00509 Mat::row_swap (m, i, q[i], r, c); 00510 q[i]= k; q[j]= j; 00511 } 00512 } 00513 } 00514 00515 }; // implementation<matrix_permute,V,matrix_naive> 00516 00517 /****************************************************************************** 00518 * Helper for pivot quality 00519 ******************************************************************************/ 00520 00521 template<typename C> 00522 struct pivot_helper { 00523 static inline bool 00524 better (const C& x1, const C& x2) { return false; } 00525 }; 00526 00527 template<typename C> inline bool 00528 better_pivot (const C& x1, const C& x2) { 00529 return pivot_helper<C>::better (x1, x2); 00530 } 00531 00532 /****************************************************************************** 00533 * Echelon forms 00534 ******************************************************************************/ 00535 00536 struct matrix_echelon {}; 00537 00538 template<typename V> 00539 struct implementation<matrix_echelon,V,matrix_naive>: 00540 public implementation<matrix_multiply,V> 00541 { 00542 typedef implementation<matrix_multiply,V> Mat; 00543 00544 private: 00545 00546 TMPL static void 00547 col_echelon (C* m, C* k, nat ri, nat ci, nat rf, nat cf, nat rm, nat cm, 00548 C& num, C& den, bool reduced, nat* permut) 00549 { 00550 // Perform column pivoting from (ri, ci) to (rf, cf). 00551 // In return m is echelonized and k is the transformation matrix, 00552 // so that the computed m is the product of m * k. 00553 // If k is NULL then it is not computed. Otherwise k must have 00554 // allocated size at least cm * cm. If the input matrix is regular, 00555 // then num/den yields the determinant. We have den=1 if the number 00556 // of row swaps is even and den=-1 otherwise. Note that the first 00557 // entries of the echelonized rows need not to be one. 00558 // 00559 // if not NULL, permut returns the permutation performed on the columns 00560 VERIFY ((ri < rm) && (ci < cm) && (rf <= rm) && (cf <= cm), "out of range"); 00561 format<C> fm= get_format (m[0]); 00562 num= promote (1, fm); den= promote (1, fm); 00563 nat cb= ci; 00564 if (permut != NULL) for (nat i= 0; i < cm; i++) permut[i]= i; 00565 while (ri < rf && ci < cf) { 00566 // search pivot 00567 nat i= ri, j= ci; 00568 nat best_index; 00569 C best_val= promote (0, fm); 00570 for (i= ri; i<rf; i++) { 00571 best_index= cf; 00572 for (j= ci; j<cf; j++) { 00573 C next_val= Mat::entry (m, i, j, rm, cm); 00574 if (next_val != promote (0, fm)) 00575 if (best_index == cf || better_pivot (next_val, best_val)) { 00576 best_index= j; 00577 best_val= next_val; 00578 } 00579 } 00580 if (best_index != cf) { j= best_index; break; } 00581 } 00582 00583 if (i<rf && j<cf) { 00584 // put pivot on the left 00585 if (j != ci) { 00586 if (k != NULL) Mat::col_swap (k, ci, j, cm, cm); 00587 Mat::col_swap (m, ci, j, rm, cm); 00588 den= -den; 00589 if (permut != NULL) swap (permut[j], permut[ci]); 00590 } 00591 ri= i; 00592 // column reduction 00593 C p= Mat::entry (m, ri, ci, rm, cm); 00594 num *= p; 00595 for (nat index= reduced ? cb : ci + 1; index < cf; index++) { 00596 if (index == ci) continue; 00597 C t= Mat::entry (m, ri, index, rm, cm); 00598 if (k != NULL) Mat::col_sweep (k, index, ci, t/p, cm, cm); 00599 Mat::col_sweep (m, index, ci, t/p, rm, cm); 00600 } 00601 ri++; ci++; 00602 } 00603 else break; 00604 } 00605 } 00606 00607 public: 00608 00609 TMPL static inline void 00610 col_echelon (C* m, C* k, nat rm, nat cm, C& num, C& den, 00611 bool reduced= false, nat* permut= NULL) { 00612 if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm); 00613 col_echelon (m, k, 0, 0, rm, cm, rm, cm, num, den, reduced, permut); 00614 } 00615 00616 TMPL static inline void 00617 col_echelon (C* m, C* k, nat rm, nat cm, 00618 bool reduced= false, nat* permut= NULL) { 00619 if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm); 00620 C num, den; 00621 col_echelon (m, k, rm, cm, num, den, reduced, permut); 00622 } 00623 00624 }; // implementation<matrix_echelon,V,matrix_naive> 00625 00626 /****************************************************************************** 00627 * Determinant and characteristic polynomial with exact division 00628 ******************************************************************************/ 00629 00630 struct matrix_determinant {}; 00631 00632 template<typename V> 00633 struct implementation<matrix_determinant,V,matrix_naive>: 00634 public implementation<matrix_echelon,V> 00635 { 00636 typedef implementation<matrix_echelon,V> Mat; 00637 00638 TMPL static void 00639 det (C& r, const C* m, nat n) { 00640 if (n == 0) { set_as (r, 1); return; } 00641 nat len_c= aligned_size<C,V> (n * n); 00642 C* c= mmx_new<C> (len_c), * k= NULL; 00643 C num, den; 00644 Mat::copy (c, m, n, n); 00645 Mat::col_echelon (c, k, n, n, num, den); 00646 if (col_is_zero (c, n-1, n, n)) set_as (r, 0); 00647 else r= num / den; 00648 mmx_delete<C> (c, len_c); 00649 } 00650 00651 }; // implementation<matrix_determinant,V,matrix_naive> 00652 00653 /****************************************************************************** 00654 * Kernel 00655 ******************************************************************************/ 00656 00657 struct matrix_kernel {}; 00658 00659 template<typename V> 00660 struct implementation<matrix_kernel,V,matrix_naive>: 00661 public implementation<matrix_echelon,V> 00662 { 00663 typedef implementation<matrix_echelon,V> Mat; 00664 00665 TMPL static nat 00666 kernel (C* k, const C* m, nat rm, nat cm) { 00667 // return the dimension of the kernel 00668 VERIFY (rm > 0 && cm > 0, "unexpected empty matrix"); 00669 nat i, j, dim, len_c= aligned_size<C,V> (rm * cm); 00670 C* c= mmx_new<C> (len_c); 00671 Mat::copy (c, m, rm, cm); 00672 Mat::col_echelon (c, k, rm, cm); 00673 for (i=0; i<cm; i++) 00674 if (Mat::col_is_zero (c, i, rm, cm)) break; 00675 mmx_delete<C> (c, len_c); 00676 dim= cm - i; 00677 if (dim < cm) 00678 for (j=0; j < dim; j++) 00679 Mat::col_swap (k, j, i+j, cm, cm); 00680 // Note that the next columns are not filled with zeros 00681 C x; 00682 for (j=0; j < dim; j++) 00683 for (i=0; i < cm; i++) 00684 if ((x= *(k + Mat::index (i, j, cm, cm))) != 0) 00685 Mat::col_div (k, x, j, cm, cm); 00686 return dim; 00687 } 00688 00689 }; // implementation<matrix_kernel,V,matrix_naive> 00690 00691 /****************************************************************************** 00692 * Image and rank 00693 ******************************************************************************/ 00694 00695 struct matrix_image {}; 00696 00697 template<typename V> 00698 struct implementation<matrix_image,V,matrix_naive>: 00699 public implementation<matrix_echelon,V> 00700 { 00701 typedef implementation<matrix_echelon,V> Mat; 00702 00703 TMPL static nat 00704 image (C* k, const C* m, nat rm, nat cm) { 00705 // return the dimension of the image 00706 VERIFY (rm > 0 && cm > 0, "unexpected empty matrix"); 00707 nat i, len_c= aligned_size<C,V> (rm * cm); 00708 C* c= mmx_new<C> (len_c); 00709 Mat::copy (c, m, rm, cm); 00710 Mat::col_echelon (c, (C*) NULL, rm, cm); 00711 for (i=0; i<cm; i++) 00712 if (Mat::col_is_zero (c, i, rm, cm)) break; 00713 Mat::get_range (k, c, 0, 0, rm, i, rm, cm); 00714 // Note that the next columns are not filled with zeros 00715 mmx_delete<C> (c, len_c); 00716 return i; 00717 } 00718 00719 TMPL static nat 00720 rank (const C* m, nat rm, nat cm) { 00721 VERIFY (rm > 0 && cm > 0, "unexpected empty matrix"); 00722 nat i, len_c= aligned_size<C,V> (rm * cm); 00723 C* c= mmx_new<C> (len_c); 00724 Mat::copy (c, m, rm, cm); 00725 Mat::col_echelon (c, (C*) NULL, rm, cm); 00726 for (i=0; i<cm; i++) 00727 if (Mat::col_is_zero (c, i, rm, cm)) break; 00728 mmx_delete<C> (c, len_c); 00729 return i; 00730 } 00731 00732 }; // implementation<matrix_image,V,matrix_naive> 00733 00734 /****************************************************************************** 00735 * Inversion 00736 ******************************************************************************/ 00737 00738 struct matrix_invert {}; 00739 00740 template<typename V> 00741 struct implementation<matrix_invert,V,matrix_naive>: 00742 public implementation<matrix_echelon,V> 00743 { 00744 typedef implementation<matrix_echelon,V> Mat; 00745 00746 TMPL static void 00747 invert_lower_triangular (C* inv, const C* m, nat n) { 00748 for (nat i=0; i<n; i++) { 00749 C d= Mat::entry (m, i, i, n, n); 00750 ASSERT (d != 0, "non-invertible matrix"); 00751 for (nat k=0; k<=i; k++) { 00752 C sum= promote (0, d); 00753 for (nat j=k; j<i; j++) 00754 sum -= Mat::entry (m, i, j, n, n) * Mat::entry (inv, j, k, n, n); 00755 if (i == k) sum += promote (1, d); 00756 inv[Mat::index (i, k, n, n)]= sum / d; 00757 } 00758 for (nat k=i+1; k<n; k++) 00759 inv[Mat::index (i, k, n, n)]= promote (0, d); 00760 } 00761 } 00762 00763 TMPL static void 00764 invert (C* k, const C* m, nat n) { 00765 nat l= aligned_size<C,V> (n * n); 00766 C* a= mmx_new<C> (l << 1); 00767 C* b= a + l; 00768 Mat::copy (k, m, n, n); 00769 Mat::col_echelon (k, b, n, n); // m * b = k, k lower triangular 00770 //mmout << "K\n----------\n"; Mat::print (mmout, k, n, n); mmout << "\n"; 00771 //mmout << "B\n----------\n"; Mat::print (mmout, b, n, n); mmout << "\n"; 00772 invert_lower_triangular (a, k, n); 00773 //mmout << "A\n----------\n"; Mat::print (mmout, a, n, n); mmout << "\n"; 00774 Mat::mul (k, b, a, n, n, n); 00775 mmx_delete<C> (a, l << 1); 00776 } 00777 00778 }; // implementation<matrix_invert,V,matrix_naive> 00779 00780 /****************************************************************************** 00781 * Gram Schmidt orthogonalization and related decompositions 00782 ******************************************************************************/ 00783 00784 struct matrix_orthogonalization {}; 00785 00786 template<typename V> 00787 struct implementation<matrix_orthogonalization,V,matrix_naive>: 00788 public implementation<matrix_multiply,V> 00789 { 00790 typedef implementation<matrix_linear,V> Mat; 00791 typedef implementation<vector_linear,V> Vec; 00792 00793 TMPL static inline C 00794 row_inn_prod (C* m, nat i, nat j, nat r, nat c) { 00795 // dot product of rows i and j 00796 return Vec::template vec_binary_big_stride<mul_add_op> ( 00797 m + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c), 00798 m + Mat::index (j, 0, r, c), Mat::index (0, 1, r, c), c); 00799 } 00800 00801 TMPL static inline C 00802 col_inn_prod (C* m, nat i, nat j, nat r, nat c) { 00803 // dot product of columns i and j 00804 return Vec::template vec_binary_big_stride<mul_add_op> ( 00805 m + Mat::index (0, i, r, c), Mat::index (1, 0, r, c), 00806 m + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), r); 00807 } 00808 00809 TMPL static void 00810 row_orthogonalize (C* m, nat r, nat c, C* n) { 00811 // In place Gram Schmidt orthogonalization of the row vectors of 00812 // m. The vector n contains the squares of the norms of the row 00813 // vectors of the computed m, ie n[i]= ||m[i,.]||^2. 00814 for (nat i= 0; i < r; i++) { 00815 for (nat j= 0; j < i; j++) { 00816 if (n[j] == 0) continue; 00817 C mu= row_inn_prod (m, i, j, r, c) / n[j]; 00818 Mat::row_combine_sub (m, i, C(1), j, mu, r, c); 00819 } 00820 n[i]= row_inn_prod (m, i, i, r, c); 00821 } 00822 } 00823 00824 TMPL static void 00825 col_orthogonalize (C* m, nat r, nat c, C* n) { 00826 // Idem for columns 00827 for (nat i= 0; i < c; i++) { 00828 for (nat j= 0; j < i; j++) { 00829 if (n[j] == 0) continue; 00830 C mu= col_inn_prod (m, i, j, r, c) / n[j]; 00831 Mat::col_combine_sub (m, i, C(1), j, mu, r, c); 00832 } 00833 n[i]= col_inn_prod (m, i, i, r, c); 00834 } 00835 } 00836 00837 TMPL static void 00838 row_orthogonalize (C* m, nat r, nat c, C* l, C* n) { 00839 // In place Gram Schmidt orthogonalization of the column vectors of 00840 // m. The matrix l is lower triangular. In return m * l equals the 00841 // input matrix m. The vector n contains the squares of the norms of 00842 // the column vectors of the computed m, ie n[i]= ||m[i,.]||^2. 00843 for (nat i= 0; i < r; i++) { 00844 for (nat j= 0; j < i; j++) { 00845 if (n[j] == 0) continue; 00846 C mu= row_inn_prod (m, i, j, r, c) / n[j]; 00847 Mat::row_combine_sub (m, i, C(1), j, mu, r, c); 00848 *(l + Mat::index (i, j, r, r))= mu; 00849 } 00850 n[i]= row_inn_prod (m, i, i, r, c); 00851 *(l + Mat::index (i, i, r, r))= C(1); 00852 } 00853 } 00854 00855 TMPL static void 00856 col_orthogonalize (C* m, nat r, nat c, C* l, C* n) { 00857 // Idem for columns. 00858 for (nat i= 0; i < c; i++) { 00859 for (nat j= 0; j < i; j++) { 00860 if (n[j] == 0) continue; 00861 C mu= col_inn_prod (m, i, j, r, c) / n[j]; 00862 Mat::col_combine_sub (m, i, C(1), j, mu, r, c); 00863 *(l + Mat::index (j, i, c, c))= mu; 00864 } 00865 n[i]= col_inn_prod (m, i, i, r, c); 00866 *(l + Mat::index (i, i, c, c))= C(1); 00867 } 00868 } 00869 00870 TMPL static void 00871 row_orthonormalize (C* m, nat r, nat c) { 00872 // In place Gram Schmidt orthonormalization of the row vectors of m. 00873 for (nat i= 0; i < r; i++) { 00874 for (nat j= 0; j < i; j++) 00875 Mat::row_combine_sub (m, i, C(1), j, row_inn_prod (m, i, j, r, c), r, c); 00876 C a= row_inn_prod (m, i, i, r, c); 00877 if (a != 0) Mat::row_div (m, sqrt (a), i, r, c); 00878 } 00879 } 00880 00881 TMPL static void 00882 col_orthonormalize (C* m, nat r, nat c) { 00883 // Idem for columns 00884 for (nat i= 0; i < c; i++) { 00885 for (nat j= 0; j < i; j++) 00886 Mat::col_combine_sub (m, i, C(1), j, col_inn_prod (m, i, j, r, c), r, c); 00887 C a= col_inn_prod (m, i, i, r, c); 00888 if (a != 0) Mat::col_div (m, sqrt (a), i, r, c); 00889 } 00890 } 00891 00892 TMPL static void 00893 row_orthonormalize (C* m, nat r, nat c, C* l) { 00894 // In place Gram Schmidt orthonormalization of the row vectors of m. 00895 // The matrix l is lower triangular. In return m * l equals the 00896 // input matrix m. 00897 for (nat i= 0; i < r; i++) { 00898 for (nat j= 0; j < i; j++) { 00899 C mu= row_inn_prod (m, i, j, r, c); 00900 Mat::row_combine_sub (m, i, C(1), j, mu, r, c); 00901 *(l + Mat::index (i, j, r, r))= mu; 00902 } 00903 C a= row_inn_prod (m, i, i, r, c); 00904 if (a != 0) { 00905 C b= sqrt (a); 00906 Mat::row_div (m, b, i, r, c); 00907 *(l + Mat::index (i, i, r, r))= b; 00908 } 00909 } 00910 } 00911 00912 TMPL static void 00913 col_orthonormalize (C* m, nat r, nat c, C* l) { 00914 // Idem for columns. 00915 for (nat i= 0; i < c; i++) { 00916 for (nat j= 0; j < i; j++) { 00917 C mu= col_inn_prod (m, i, j, r, c); 00918 Mat::col_combine_sub (m, i, C(1), j, mu, r, c); 00919 *(l + Mat::index (j, i, c, c))= mu; 00920 } 00921 C a= col_inn_prod (m, i, i, r, c); 00922 if (a != 0) { 00923 C b= sqrt (a); 00924 Mat::col_div (m, b, i, r, c); 00925 *(l + Mat::index (i, i, c, c))= b; 00926 } 00927 } 00928 } 00929 00930 }; // implementation<matrix_orthogonalization,V,matrix_naive> 00931 00932 #undef TMPL 00933 }//namespace mmx 00934 #endif //__MMX__MATRIX_NAIVE__HPP