html/doxygen/matrix__naive_8hpp_source.html

00001
00002 /******************************************************************************
00003 * MODULE     : matrix_naive.hpp
00004 * DESCRIPTION: Naive matrix operations
00005 * COPYRIGHT  : (C) 2007  J. der Hoeven and G. Lecerf and O. Ruatta
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012
00013 #ifndef __MMX__MATRIX_NAIVE__HPP
00014 #define __MMX__MATRIX_NAIVE__HPP
00015 #include <basix/vector.hpp>
00016
00017 namespace mmx {
00018 #define TMPL template<typename C>
00019
00020 /******************************************************************************
00021 * Naive variant for matrices
00022 ******************************************************************************/
00023
00024 struct matrix_naive {
00025   typedef vector_naive Vec;        // Matrices as vectors
00026   typedef matrix_naive Naive;      // Naive variant
00027   typedef matrix_naive Positive;   // Multiplication without subtractions
00028   typedef matrix_naive No_aligned; // Multiplication without memory alignement
00029   typedef matrix_naive No_simd;    // Variant without SIMD instructions
00030   typedef matrix_naive No_thread;  // Variant without threads
00031   typedef matrix_naive No_scaled;  // Variant without preconditioning
00032 };
00033
00034 template<typename C>
00035 struct matrix_variant_helper {
00036   typedef matrix_naive MV;
00037 };
00038
00039 /******************************************************************************
00040 * Matrix defaults
00041 ******************************************************************************/
00042
00043 struct matrix_defaults {};
00044
00045 template<typename V>
00046 struct implementation<matrix_defaults,V,matrix_naive> {
00047   static const nat def_rows = 0;
00048   static const nat def_cols = 0;
00049   static const nat init_rows= 1;
00050   static const nat init_cols= 1;
00051 }; // implementation<matrix_defaults,V,matrix_naive>
00052
00053 /******************************************************************************
00054 * Inheritance from vectors
00055 ******************************************************************************/
00056
00057 template<typename V>
00058 struct implementation<vector_defaults,V,matrix_naive>:
00059   public implementation<vector_defaults,V,typename V::Vec> {};
00060
00061 template<typename V>
00062 struct implementation<vector_allocate,V,matrix_naive>:
00063   public implementation<vector_allocate,V,typename V::Vec> {};
00064
00065 template<typename V>
00066 struct implementation<vector_abstractions,V,matrix_naive>:
00067   public implementation<vector_abstractions,V,typename V::Vec> {};
00068
00069 template<typename V>
00070 struct implementation<vector_abstractions_stride,V,matrix_naive>:
00071   public implementation<vector_abstractions_stride,V,typename V::Vec> {};
00072
00073 template<typename V>
00074 struct implementation<vector_linear,V,matrix_naive>:
00075   public implementation<vector_linear,V,typename V::Vec> {};
00076
00077 template<typename C, typename V> inline nat
00078 aligned_size (nat r, nat c) {
00079   return aligned_size<C,V> (r * c); }
00080
00081 template<typename C> inline nat
00082 default_aligned_size (nat r, nat c) {
00083   return default_aligned_size<C> (r * c); }
00084
00085 /******************************************************************************
00086 * Vectorial operations
00087 ******************************************************************************/
00088
00089 struct matrix_vectorial {};
00090
00091 template<typename V>
00092 struct implementation<matrix_vectorial,V,matrix_naive>:
00093   public implementation<matrix_defaults,V>
00094 {
00095   typedef implementation<vector_linear,V> Vec;
00096
00097 static inline nat
00098 index (nat row, nat col, nat rows, nat cols) {
00099   (void) cols;
00100   return col * rows + row; }
00101
00102 TMPL
00103 static inline const C&
00104 entry (const C* m, nat row, nat col, nat rows, nat cols) {
00105   return * (m + index (row, col, rows, cols)); }
00106
00107 /*
00108 static inline nat
00109 index (nat row, nat col, nat rows, nat cols) {
00110   (void) rows;
00111   return row * cols + col; }
00112 */
00113
00114 template<typename Op, typename T, typename C> static inline void
00115 mat_unary_stride (T* dest, nat dest_rs, nat dest_cs,
00116                   const C* src, nat src_rs, nat src_cs,
00117                   nat rows, nat cols) {
00118   for (; cols != 0; dest += dest_cs, src += src_cs, cols--)
00119     Vec::template vec_unary_stride<Op> (dest, dest_rs, src, src_rs, rows); }
00120
00121 template<typename Op, typename T, typename C1, typename C2> static inline void
00122 mat_binary_stride (T* dest, nat dest_rs, nat dest_cs,
00123                    const C1* src1, nat src1_rs, nat src1_cs,
00124                    const C2* src2, nat src2_rs, nat src2_cs,
00125                    nat rows, nat cols) {
00126   for (; cols != 0; dest += dest_cs, src1 += src1_cs, src2 += src2_cs, cols--)
00127     Vec::template vec_binary_stride<Op>
00128       (dest, dest_rs, src1, src1_rs, src2, src2_rs, rows); }
00129
00130 TMPL static inline void
00131 clear (C* dest, nat r, nat c) {
00132   Vec::clear (dest, r * c);
00133 }
00134
00135 TMPL static void
00136 set (C* dest, const C& c, nat rows, nat cols) {
00137   clear (dest, rows, cols);
00138   nat m= min (rows, cols), plus= index (1, 1, rows, cols);
00139   while (m != 0) {
00140     *dest= c;
00141     dest += plus; m--; } }
00142
00143 TMPL static void
00144 get_range (C* d, const C* s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00145   nat rr= r2 - r1, cc = c2 - c1;
00146   mat_unary_stride<id_op>
00147     (d, index (1, 0, rr, cc), index (0, 1, rr, cc),
00148      s + index (r1, c1, r, c), index (1, 0, r, c), index (0, 1, r, c),
00149      rr, cc);
00150 }
00151
00152 TMPL static void
00153 clear_range (C* d, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00154   d += index (r1, c1, r, c);
00155   C* dd= d;
00156   nat rp= index (1, 0, r, c), cp= index (0, 1, r, c);
00157   for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd)
00158     for (nat i=0; i<(r2-r1); i++, d += rp)
00159       clear_op::set_op (*d);
00160 }
00161
00162 TMPL static void
00163 set_range (C* d, const C& s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00164   d += index (r1, c1, r, c);
00165   C* dd= d;
00166   nat rp= index (1, 0, r, c), cp= index (0, 1, r, c);
00167   for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd)
00168     for (nat i=0; i<(r2-r1); i++, d += rp)
00169       if (i == j) *d = s;
00170       else clear_op::set_op (*d);
00171 }
00172
00173 TMPL static void
00174 copy (C* dest, const C* s, nat r, nat c) {
00175   Vec::copy (dest, s, r * c); }
00176
00177 TMPL static void
00178 transpose (C* dest, const C* src, nat rows, nat cols) {
00179   mat_unary_stride<id_op>
00180     (dest, index (0, 1, cols, rows), index (1, 0, cols, rows),
00181      src,  index (1, 0, rows, cols), index (0, 1, rows, cols),
00182      rows, cols); }
00183
00184 TMPL static inline void
00185 print (const port& out, const C* s, nat rows, nat cols) {
00186   out << "[ ";
00187   for (nat i=0; i < rows; i++) {
00188     for (nat j=0; j < cols; j++) {
00189       out << * (s + index (i, j, rows, cols));
00190       if (j + 1 != cols) out << ", ";
00191     }
00192     if (i + 1 != rows) out << ";" << lf;
00193   }
00194   out << " ]";
00195 }
00196
00197 }; // implementation<matrix_vectorial,V,matrix_naive>
00198
00199 /******************************************************************************
00200 * Operations on rows and columns
00201 ******************************************************************************/
00202
00203 struct matrix_linear {};
00204
00205 template<typename V>
00206 struct implementation<matrix_linear,V,matrix_naive>:
00207   public implementation<matrix_vectorial,V>
00208 {
00209   typedef implementation<matrix_vectorial,V> Mat;
00210   typedef implementation<vector_linear,V> Vec;
00211
00212 template<typename Op, typename C, typename X> static inline void
00213 col_unary_scalar (C* dest, const X& x, nat c1, nat r, nat rr, nat cc) {
00214   Vec::template vec_unary_scalar_stride<Op>
00215     (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc), x, r);
00216 }
00217
00218 template<typename Op, typename C, typename X> static inline void
00219 col_binary_scalar (C* dest, const X& x, nat c1, nat c2, nat r,
00220                    nat rr, nat cc) {
00221   Vec::template vec_binary_scalar_stride<Op>
00222     (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc),
00223      dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), x, r);
00224 }
00225
00226 template<typename Op, typename C> static inline void
00227 col_binary_combine (C* dest, nat c1, nat c2, nat r, nat rr, nat cc) {
00228   Vec::template vec_binary_combine_stride<Op>
00229     (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc),
00230      dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), r);
00231 }
00232
00233 template<typename Op, typename C, typename X> static inline void
00234 row_unary_scalar (C* dest, const X& x, nat r1, nat c, nat rr, nat cc) {
00235   Vec::template vec_unary_scalar_stride<Op>
00236     (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c);
00237 }
00238
00239 template<typename Op, typename C, typename X> static inline void
00240 row_binary_scalar (C* dest, const X& x, nat r1, nat r2, nat c,
00241                    nat rr, nat cc) {
00242   Vec::template vec_binary_scalar_stride<Op>
00243     (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00244      dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c);
00245 }
00246
00247 template<typename Op, typename C> static inline void
00248 row_binary_combine (C* dest, nat r1, nat r2, nat c, nat rr, nat cc) {
00249   Vec::template vec_binary_combine_stride<Op>
00250     (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00251      dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), c);
00252 }
00253
00254 TMPL static void col_mul (C* s, const C& sc, nat i, nat r, nat c) {
00255   ASSERT (i<c, "out of range");
00256   col_unary_scalar<rmul_op> (s, sc, i, r, r, c); }
00257 TMPL static void col_div (C* s, const C& sc, nat i, nat r, nat c) {
00258   ASSERT (i<c, "out of range");
00259   col_unary_scalar<rdiv_op> (s, sc, i, r, r, c); }
00260 TMPL static void row_mul (C* s, const C& sc, nat i, nat r, nat c) {
00261   ASSERT (i<r, "out of range");
00262   row_unary_scalar<rmul_op> (s, sc, i, c, r, c); }
00263 TMPL static void row_div (C* s, const C& sc, nat i, nat r, nat c) {
00264   ASSERT (i<r, "out of range");
00265   row_unary_scalar<rdiv_op> (s, sc, i, c, r, c); }
00266
00267 TMPL static void col_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) {
00268   ASSERT (i<c && j<c, "out of range");
00269   col_binary_scalar<mul_sub_op> (s, sc, i, j, r, r, c); }
00270 TMPL static void row_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) {
00271   ASSERT (i<r && j<r, "out of range");
00272   row_binary_scalar<mul_sub_op> (s, sc, i, j, c, r, c); }
00273 TMPL static void col_sweep (C* s, nat i, nat j, nat r, const C& sc,
00274                             nat rr, nat cc) {
00275   col_binary_scalar<mul_sub_op> (s, sc, i, j, r, rr, cc); }
00276 TMPL static void row_sweep (C* s, nat i, nat j, nat c, const C& sc,
00277                             nat rr, nat cc) {
00278   row_binary_scalar<mul_sub_op> (s, sc, i, j, c, rr, cc); }
00279
00280 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat c) {
00281   ASSERT (i<c && j<c, "out of range");
00282   col_binary_combine<swap_op> (s, i, j, r, r, c); }
00283 TMPL static void row_swap (C* s, nat i, nat j, nat r, nat c) {
00284   ASSERT (i<r && j<r, "out of range");
00285   row_binary_combine<swap_op> (s, i, j, c, r, c); }
00286 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat rr, nat cc) {
00287   col_binary_combine<swap_op> (s, i, j, r, rr, cc); }
00288 TMPL static void row_swap (C* s, nat i, nat j, nat c, nat rr, nat cc) {
00289   row_binary_combine<swap_op> (s, i, j, c, rr, cc); }
00290
00291 TMPL static void
00292 row_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) {
00293   ASSERT (i<r && j<r, "out of range");
00294   C* iti= s + Mat::index (i, 0, r, c);
00295   C* itj= s + Mat::index (j, 0, r, c);
00296   nat plus= Mat::index (0, 1, r, c);
00297   while (c>0) {
00298     *iti= si * (*iti) - sj * (*itj);
00299     iti += plus; itj += plus; c--; } }
00300
00301 TMPL static void
00302 col_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) {
00303   ASSERT (i<c && j<c, "out of range");
00304   C* iti= s + Mat::index (0, i, r, c);
00305   C* itj= s + Mat::index (0, j, r, c);
00306   nat plus= Mat::index (1, 0, r, c);
00307   while (r>0) {
00308     *iti= si * (*iti) - sj * (*itj);
00309     iti += plus; itj += plus; r--; } }
00310
00311 TMPL static bool
00312 row_is_zero (const C* s, nat i, nat r, nat c) {
00313   return Vec::template vec_binary_test_scalar_stride<equal_op>
00314     (s + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c), C (0), c); }
00315
00316 TMPL static bool
00317 col_is_zero (const C* s, nat j, nat r, nat c) {
00318   return Vec::template vec_binary_test_scalar_stride<equal_op>
00319     (s + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), C (0), r); }
00320
00321 }; // implementation<matrix_linear,V,matrix_naive>
00322
00323 /******************************************************************************
00324 * Multiplication
00325 ******************************************************************************/
00326
00327 template<typename V>
00328 struct matrix_multiply_threshold {};
00329
00330 struct matrix_multiply_base {};
00331 struct matrix_multiply {};
00332
00333 template<typename V>
00334 struct implementation<matrix_multiply_base,V,matrix_naive>:
00335   public implementation<matrix_linear,V>
00336 {
00337   typedef implementation<matrix_linear,V> Mat;
00338
00339 template<typename Op, typename C>
00340 struct clear_helper {
00341   static inline void op (C* d, nat r, nat rr, nat c, nat cc) {
00342     clear_range (d, 0, 0, r, c, rr, cc); }
00343 };
00344
00345 template<typename C>
00346 struct clear_helper<mul_add_op,C> {
00347   static inline void op (C* d, nat r, nat rr, nat c, nat cc) {}
00348 };
00349
00350 template<typename Op, typename C> static inline void
00351 clr (C* d, nat r, nat rr, nat c, nat cc) {
00352   clear_helper<Op,C>::op (d, r, rr, c, cc);
00353 }
00354
00355 template<typename Op, typename D, typename S1, typename S2>
00356 static inline void
00357 mul (D* d, const S1* s1, const S2* s2,
00358      nat r, nat rr, nat l, nat ll, nat c, nat cc) {
00359   typedef typename Op::acc_op Acc;
00360   if (l == 0) clr<Op> (d, r, rr, c, cc);
00361   else {
00362     nat ri, ci, li;
00363     D *dr, *dc;
00364     const S1 *s1r, *s1c;
00365     const S2 *s2r, *s2c;
00366     nat drp = Mat::index (1, 0, rr, cc), dcp = Mat::index (0, 1, rr, cc);
00367     nat s1rp= Mat::index (1, 0, rr, ll), s1cp= Mat::index (0, 1, rr, ll);
00368     nat s2rp= Mat::index (1, 0, ll, cc), s2cp= Mat::index (0, 1, ll, cc);
00369     for (ri= r, dr= d, s1r= s1; ri!=0; ri--, dr += drp, s1r += s1rp)
00370       for (ci= c, dc= dr, s2c= s2; ci!=0; ci--, dc += dcp, s2c += s2cp) {
00371         D tmp= *dc;
00372         for (li= l, s1c= s1r, s2r= s2c; li==l; li--, s1c += s1cp, s2r += s2rp)
00373           Op ::set_op (tmp, *s1c, *s2r);
00374         for (                         ; li!=0; li--, s1c += s1cp, s2r += s2rp)
00375           Acc::set_op (tmp, *s1c, *s2r);
00376         *dc= tmp;
00377       }
00378   }
00379 }
00380
00381 }; // implementation<matrix_multiply_base,V,matrix_naive>
00382
00383 template<typename V>
00384 struct implementation<matrix_multiply,V,matrix_naive>:
00385   public implementation<matrix_multiply_base,V>
00386 {
00387   typedef implementation<matrix_multiply_base,V> Mat;
00388
00389 template<typename Op, typename D, typename S1, typename S2>
00390 static inline void
00391 mul (D* d, const S1* s1, const S2* s2,
00392      nat r, nat rr, nat l, nat ll, nat c, nat cc) {
00393   Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00394 }
00395
00396 template<typename D, typename S1, typename S2> static inline void
00397 mul (D* dest, const S1* m1, const S2* m2, nat r, nat l, nat c) {
00398   Mat::template mul<mul_op> (dest, m1, m2, r, r, l, l, c, c);
00399 }
00400
00401 }; // implementation<matrix_multiply,V,matrix_naive>
00402
00403 /******************************************************************************
00404 * Matrix product iteration
00405 ******************************************************************************/
00406
00407 struct matrix_iterate {};
00408
00409 template<typename V>
00410 struct implementation<matrix_iterate,V,matrix_naive>:
00411   public implementation<matrix_multiply,V>
00412 {
00413   typedef implementation<matrix_multiply,V> Mat;
00414
00415 TMPL static inline void
00416 iterate_mul (C** d, const C* s, const C* x,
00417              nat rs, nat cs, nat rx, nat cx, nat n) {
00418   // return s^0 x, s^1 x,..., s^{n-1} x in d
00419   // d must have allocated size n * rs * cx
00420   VERIFY (cs == rx && rs == cs, "sizes do not match");
00421   if (rs == 0) return;
00422   Mat::get_range (d[0], x, 0, 0, rx, cx, rx, cx);
00423   for (nat i = 1; i < n; i++)
00424     Mat::mul (d[i], s, d[i-1], rs, cs, cx);
00425 }
00426
00427 TMPL static inline void
00428 project_iterate_mul (C** d, const C* y, const C* s, const C* x,
00429                      nat ry, nat cy, nat rs, nat cs, nat rx, nat cx, nat n) {
00430   // return y s^0 x, y s^1 x,..., y s^{n-1} x in d
00431   VERIFY (cy == rs && rs == cs && cs == rx, "sizes do not match");
00432   if (n == 0) return;
00433   nat l= aligned_size<C,V> (rx * cx);
00434   C* buf= mmx_new<C> (l << 1);
00435   C* t1= buf, * t2= t1 + l;
00436   Mat::get_range (t1, x, 0, 0, rx, cx, rx, cx);
00437   Mat::mul (d[0], y, t1, ry, cy, cx);
00438   for (nat i = 1; i < n; i++) {
00439     Mat::mul (t2, s, t1, rs, cs, cx);
00440     Mat::mul (d[i], y, t2, ry, cy, cx);
00441     swap (t1, t2);
00442   }
00443   mmx_delete<C> (buf, l << 1);
00444 }
00445
00446 }; // implementation<matrix_iterate,V,matrix_naive>
00447
00448 /******************************************************************************
00449 * Low level action of permutations on matrices
00450 ******************************************************************************/
00451
00452 struct matrix_permute {};
00453
00454 template<typename V>
00455 struct implementation<matrix_permute,V,matrix_naive>:
00456   public implementation<matrix_multiply,V>
00457 {
00458   typedef implementation<matrix_multiply,V> Mat;
00459   typedef implementation<vector_linear,V> Vec;
00460
00461   TMPL static void
00462   col_permute (C* dest, const C* m, const nat* p, nat r, nat c) {
00463     // Permute columns of r x c matrix according to length c permutation p
00464     nat rs= Mat::index (1, 0, r, c);
00465     for (nat i=0; i<c; i++)
00466       Vec::template vec_unary_stride<id_op>
00467         (dest + Mat::index (0, i, r, c), rs,
00468          m + Mat::index (0, p[i], r, c), rs, r);
00469   }
00470
00471   TMPL static void
00472   row_permute (C* dest, const C* m, const nat* p, nat r, nat c) {
00473     // Permute rows of r x c matrix according to length c permutation p
00474     nat cs= Mat::index (0, 1, r, c);
00475     for (nat i=0; i<r; i++)
00476       Vec::template vec_unary_stride<id_op>
00477         (dest + Mat::index (i, 0, r, c), cs,
00478          m + Mat::index (p[i], 0, r, c), cs, c);
00479   }
00480
00481   TMPL static void
00482   col_permute (C* m, const nat* p, nat r, nat c, bool inv= false) {
00483     // Permute columns of r x c matrix according to length c permutation p
00484     // Apply the inverse permutation if inv is true
00485     nat q[c];
00486     if (inv) for (nat i=0; i<c; i++) q[i]= p[i];
00487     else for (nat i=0; i<c; i++) q[p[i]]= i;
00488     for (nat i=0; i<c; ) {
00489       if (q[i] == i) i++;
00490       else {
00491         nat j= q[i], k= q[j];
00492         Mat::col_swap (m, i, j, r, c);
00493         q[i]= k; q[j]= j;
00494       }
00495     }
00496   }
00497
00498   TMPL static void
00499   row_permute (C* m, const nat* p, nat r, nat c, bool inv= false) {
00500     // Permute rows of r x c matrix according to length c permutation p
00501     // Apply the inverse permutation if inv is true
00502     nat q[r];
00503     if (inv) for (nat i=0; i<c; i++) q[i]= p[i];
00504     else for (nat i=0; i<c; i++) q[p[i]]= i;
00505     for (nat i=0; i<r; ) {
00506       if (q[i] == i) i++;
00507       else {
00508         nat j= q[i], k= q[j];
00509         Mat::row_swap (m, i, q[i], r, c);
00510         q[i]= k; q[j]= j;
00511       }
00512     }
00513   }
00514
00515 }; // implementation<matrix_permute,V,matrix_naive>
00516
00517 /******************************************************************************
00518 * Helper for pivot quality
00519 ******************************************************************************/
00520
00521 template<typename C>
00522 struct pivot_helper {
00523   static inline bool
00524   better (const C& x1, const C& x2) { return false; }
00525 };
00526
00527 template<typename C> inline bool
00528 better_pivot (const C& x1, const C& x2) {
00529   return pivot_helper<C>::better (x1, x2);
00530 }
00531
00532 /******************************************************************************
00533 * Echelon forms
00534 ******************************************************************************/
00535
00536 struct matrix_echelon {};
00537
00538 template<typename V>
00539 struct implementation<matrix_echelon,V,matrix_naive>:
00540   public implementation<matrix_multiply,V>
00541 {
00542   typedef implementation<matrix_multiply,V> Mat;
00543
00544 private:
00545
00546 TMPL static void
00547 col_echelon (C* m, C* k, nat ri, nat ci, nat rf, nat cf, nat rm, nat cm,
00548              C& num, C& den, bool reduced, nat* permut)
00549 {
00550   // Perform column pivoting from (ri, ci) to (rf, cf).
00551   // In return m is echelonized and k is the transformation matrix,
00552   // so that the computed m is the product of m * k.
00553   // If k is NULL then it is not computed. Otherwise k must have
00554   // allocated size at least cm * cm. If the input matrix is regular,
00555   // then num/den yields the determinant. We have den=1 if the number
00556   // of row swaps is even and den=-1 otherwise. Note that the first
00557   // entries of the echelonized rows need not to be one.
00558   //
00559   // if not NULL, permut returns the permutation performed on the columns
00560   VERIFY ((ri < rm) && (ci < cm) && (rf <= rm) && (cf <= cm), "out of range");
00561   format<C> fm= get_format (m[0]);
00562   num= promote (1, fm); den= promote (1, fm);
00563   nat cb= ci;
00564   if (permut != NULL) for (nat i= 0; i < cm; i++) permut[i]= i;
00565   while (ri < rf && ci < cf) {
00566     // search pivot
00567     nat i= ri, j= ci;
00568     nat best_index;
00569     C   best_val= promote (0, fm);
00570     for (i= ri; i<rf; i++) {
00571       best_index= cf;
00572       for (j= ci; j<cf; j++) {
00573         C next_val= Mat::entry (m, i, j, rm, cm);
00574         if (next_val != promote (0, fm))
00575           if (best_index == cf || better_pivot (next_val, best_val)) {
00576             best_index= j;
00577             best_val= next_val;
00578           }
00579       }
00580       if (best_index != cf) { j= best_index; break; }
00581     }
00582
00583     if (i<rf && j<cf) {
00584       // put pivot on the left
00585       if (j != ci) {
00586         if (k != NULL) Mat::col_swap (k, ci, j, cm, cm);
00587         Mat::col_swap (m, ci, j, rm, cm);
00588         den= -den;
00589         if (permut != NULL) swap (permut[j], permut[ci]);
00590       }
00591       ri= i;
00592       // column reduction
00593       C p= Mat::entry (m, ri, ci, rm, cm);
00594       num *= p;
00595       for (nat index= reduced ? cb : ci + 1; index < cf; index++) {
00596         if (index == ci) continue;
00597         C t= Mat::entry (m, ri, index, rm, cm);
00598         if (k != NULL) Mat::col_sweep (k, index, ci, t/p, cm, cm);
00599         Mat::col_sweep (m, index, ci, t/p, rm, cm);
00600       }
00601       ri++; ci++;
00602     }
00603     else  break;
00604   }
00605 }
00606
00607 public:
00608
00609 TMPL static inline void
00610 col_echelon (C* m, C* k, nat rm, nat cm, C& num, C& den,
00611              bool reduced= false, nat* permut= NULL) {
00612   if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm);
00613   col_echelon (m, k, 0, 0, rm, cm, rm, cm, num, den, reduced, permut);
00614 }
00615
00616 TMPL static inline void
00617 col_echelon (C* m, C* k, nat rm, nat cm,
00618              bool reduced= false, nat* permut= NULL) {
00619   if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm);
00620   C num, den;
00621   col_echelon (m, k, rm, cm, num, den, reduced, permut);
00622 }
00623
00624 }; // implementation<matrix_echelon,V,matrix_naive>
00625
00626 /******************************************************************************
00627 * Determinant and characteristic polynomial with exact division
00628 ******************************************************************************/
00629
00630 struct matrix_determinant {};
00631
00632 template<typename V>
00633 struct implementation<matrix_determinant,V,matrix_naive>:
00634   public implementation<matrix_echelon,V>
00635 {
00636   typedef implementation<matrix_echelon,V> Mat;
00637
00638 TMPL static void
00639 det (C& r, const C* m, nat n) {
00640   if (n == 0) { set_as (r, 1); return; }
00641   nat len_c= aligned_size<C,V> (n * n);
00642   C* c= mmx_new<C> (len_c), * k= NULL;
00643   C num, den;
00644   Mat::copy (c, m, n, n);
00645   Mat::col_echelon (c, k, n, n, num, den);
00646   if (col_is_zero (c, n-1, n, n)) set_as (r, 0);
00647   else r= num / den;
00648   mmx_delete<C> (c, len_c);
00649 }
00650
00651 }; // implementation<matrix_determinant,V,matrix_naive>
00652
00653 /******************************************************************************
00654 * Kernel
00655 ******************************************************************************/
00656
00657 struct matrix_kernel {};
00658
00659 template<typename V>
00660 struct implementation<matrix_kernel,V,matrix_naive>:
00661   public implementation<matrix_echelon,V>
00662 {
00663   typedef implementation<matrix_echelon,V> Mat;
00664
00665 TMPL static nat
00666 kernel (C* k, const C* m, nat rm, nat cm) {
00667   // return the dimension of the kernel
00668   VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00669   nat i, j, dim, len_c= aligned_size<C,V> (rm * cm);
00670   C* c= mmx_new<C> (len_c);
00671   Mat::copy (c, m, rm, cm);
00672   Mat::col_echelon (c, k, rm, cm);
00673   for (i=0; i<cm; i++)
00674     if (Mat::col_is_zero (c, i, rm, cm)) break;
00675   mmx_delete<C> (c, len_c);
00676   dim= cm - i;
00677   if (dim < cm)
00678     for (j=0; j < dim; j++)
00679       Mat::col_swap (k, j, i+j, cm, cm);
00680     // Note that the next columns are not filled with zeros
00681   C x;
00682   for (j=0; j < dim; j++)
00683     for (i=0; i < cm; i++)
00684       if ((x= *(k + Mat::index (i, j, cm, cm))) != 0)
00685         Mat::col_div (k, x, j, cm, cm);
00686   return dim;
00687 }
00688
00689 }; // implementation<matrix_kernel,V,matrix_naive>
00690
00691 /******************************************************************************
00692 * Image and rank
00693 ******************************************************************************/
00694
00695 struct matrix_image {};
00696
00697 template<typename V>
00698 struct implementation<matrix_image,V,matrix_naive>:
00699   public implementation<matrix_echelon,V>
00700 {
00701   typedef implementation<matrix_echelon,V> Mat;
00702
00703 TMPL static nat
00704 image (C* k, const C* m, nat rm, nat cm) {
00705   // return the dimension of the image
00706   VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00707   nat i, len_c= aligned_size<C,V> (rm * cm);
00708   C* c= mmx_new<C> (len_c);
00709   Mat::copy (c, m, rm, cm);
00710   Mat::col_echelon (c, (C*) NULL, rm, cm);
00711   for (i=0; i<cm; i++)
00712     if (Mat::col_is_zero (c, i, rm, cm)) break;
00713   Mat::get_range (k, c, 0, 0, rm, i, rm, cm);
00714   // Note that the next columns are not filled with zeros
00715   mmx_delete<C> (c, len_c);
00716   return i;
00717 }
00718
00719 TMPL static nat
00720 rank (const C* m, nat rm, nat cm) {
00721   VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00722   nat i, len_c= aligned_size<C,V> (rm * cm);
00723   C* c= mmx_new<C> (len_c);
00724   Mat::copy (c, m, rm, cm);
00725   Mat::col_echelon (c, (C*) NULL, rm, cm);
00726   for (i=0; i<cm; i++)
00727     if (Mat::col_is_zero (c, i, rm, cm)) break;
00728   mmx_delete<C> (c, len_c);
00729   return i;
00730 }
00731
00732 }; // implementation<matrix_image,V,matrix_naive>
00733
00734 /******************************************************************************
00735 * Inversion
00736 ******************************************************************************/
00737
00738 struct matrix_invert {};
00739
00740 template<typename V>
00741 struct implementation<matrix_invert,V,matrix_naive>:
00742   public implementation<matrix_echelon,V>
00743 {
00744   typedef implementation<matrix_echelon,V> Mat;
00745
00746 TMPL static void
00747 invert_lower_triangular (C* inv, const C* m, nat n) {
00748   for (nat i=0; i<n; i++) {
00749     C d= Mat::entry (m, i, i, n, n);
00750     ASSERT (d != 0, "non-invertible matrix");
00751     for (nat k=0; k<=i; k++) {
00752       C sum= promote (0, d);
00753       for (nat j=k; j<i; j++)
00754         sum -= Mat::entry (m, i, j, n, n) * Mat::entry (inv, j, k, n, n);
00755       if (i == k) sum += promote (1, d);
00756       inv[Mat::index (i, k, n, n)]= sum / d;
00757     }
00758     for (nat k=i+1; k<n; k++)
00759       inv[Mat::index (i, k, n, n)]= promote (0, d);
00760   }
00761 }
00762
00763 TMPL static void
00764 invert (C* k, const C* m, nat n) {
00765   nat l= aligned_size<C,V> (n * n);
00766   C* a= mmx_new<C> (l << 1);
00767   C* b= a + l;
00768   Mat::copy (k, m, n, n);
00769   Mat::col_echelon (k, b, n, n); // m * b = k, k lower triangular
00770   //mmout << "K\n----------\n"; Mat::print (mmout, k, n, n); mmout << "\n";
00771   //mmout << "B\n----------\n"; Mat::print (mmout, b, n, n); mmout << "\n";
00772   invert_lower_triangular (a, k, n);
00773   //mmout << "A\n----------\n"; Mat::print (mmout, a, n, n); mmout << "\n";
00774   Mat::mul (k, b, a, n, n, n);
00775   mmx_delete<C> (a, l << 1);
00776 }
00777
00778 }; // implementation<matrix_invert,V,matrix_naive>
00779
00780 /******************************************************************************
00781 * Gram Schmidt orthogonalization and related decompositions
00782 ******************************************************************************/
00783
00784 struct matrix_orthogonalization {};
00785
00786 template<typename V>
00787 struct implementation<matrix_orthogonalization,V,matrix_naive>:
00788   public implementation<matrix_multiply,V>
00789 {
00790   typedef implementation<matrix_linear,V> Mat;
00791   typedef implementation<vector_linear,V> Vec;
00792
00793 TMPL static inline C
00794 row_inn_prod (C* m, nat i, nat j, nat r, nat c) {
00795   // dot product of rows i and j
00796   return Vec::template vec_binary_big_stride<mul_add_op> (
00797     m + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c),
00798     m + Mat::index (j, 0, r, c), Mat::index (0, 1, r, c), c);
00799 }
00800
00801 TMPL static inline C
00802 col_inn_prod (C* m, nat i, nat j, nat r, nat c) {
00803   // dot product of columns i and j
00804   return Vec::template vec_binary_big_stride<mul_add_op> (
00805     m + Mat::index (0, i, r, c), Mat::index (1, 0, r, c),
00806     m + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), r);
00807 }
00808
00809 TMPL static void
00810 row_orthogonalize (C* m, nat r, nat c, C* n) {
00811 // In place Gram Schmidt orthogonalization of the row vectors of
00812 // m. The vector n contains the squares of the norms of the row
00813 // vectors of the computed m, ie n[i]= ||m[i,.]||^2.
00814   for (nat i= 0; i < r; i++) {
00815     for (nat j= 0; j < i; j++) {
00816       if (n[j] == 0) continue;
00817       C mu= row_inn_prod (m, i, j, r, c) / n[j];
00818       Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00819     }
00820     n[i]= row_inn_prod (m, i, i, r, c);
00821   }
00822 }
00823
00824 TMPL static void
00825 col_orthogonalize (C* m, nat r, nat c, C* n) {
00826 // Idem for columns
00827   for (nat i= 0; i < c; i++) {
00828     for (nat j= 0; j < i; j++) {
00829       if (n[j] == 0) continue;
00830       C mu= col_inn_prod (m, i, j, r, c) / n[j];
00831       Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00832     }
00833     n[i]= col_inn_prod (m, i, i, r, c);
00834   }
00835 }
00836
00837 TMPL static void
00838 row_orthogonalize (C* m, nat r, nat c, C* l, C* n) {
00839 // In place Gram Schmidt orthogonalization of the column vectors of
00840 // m. The matrix l is lower triangular. In return m * l equals the
00841 // input matrix m. The vector n contains the squares of the norms of
00842 // the column vectors of the computed m, ie n[i]= ||m[i,.]||^2.
00843   for (nat i= 0; i < r; i++) {
00844     for (nat j= 0; j < i; j++) {
00845       if (n[j] == 0) continue;
00846       C mu= row_inn_prod (m, i, j, r, c) / n[j];
00847       Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00848       *(l + Mat::index (i, j, r, r))= mu;
00849     }
00850     n[i]= row_inn_prod (m, i, i, r, c);
00851     *(l + Mat::index (i, i, r, r))= C(1);
00852   }
00853 }
00854
00855 TMPL static void
00856 col_orthogonalize (C* m, nat r, nat c, C* l, C* n) {
00857 // Idem for columns.
00858   for (nat i= 0; i < c; i++) {
00859     for (nat j= 0; j < i; j++) {
00860       if (n[j] == 0) continue;
00861       C mu= col_inn_prod (m, i, j, r, c) / n[j];
00862       Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00863       *(l + Mat::index (j, i, c, c))= mu;
00864     }
00865     n[i]= col_inn_prod (m, i, i, r, c);
00866     *(l + Mat::index (i, i, c, c))= C(1);
00867   }
00868 }
00869
00870 TMPL static void
00871 row_orthonormalize (C* m, nat r, nat c) {
00872 // In place Gram Schmidt orthonormalization of the row vectors of m.
00873   for (nat i= 0; i < r; i++) {
00874     for (nat j= 0; j < i; j++)
00875       Mat::row_combine_sub (m, i, C(1), j, row_inn_prod (m, i, j, r, c), r, c);
00876     C a= row_inn_prod (m, i, i, r, c);
00877     if (a != 0) Mat::row_div (m, sqrt (a), i, r, c);
00878   }
00879 }
00880
00881 TMPL static void
00882 col_orthonormalize (C* m, nat r, nat c) {
00883 // Idem for columns
00884   for (nat i= 0; i < c; i++) {
00885     for (nat j= 0; j < i; j++)
00886       Mat::col_combine_sub (m, i, C(1), j, col_inn_prod (m, i, j, r, c), r, c);
00887     C a= col_inn_prod (m, i, i, r, c);
00888     if (a != 0) Mat::col_div (m, sqrt (a), i, r, c);
00889   }
00890 }
00891
00892 TMPL static void
00893 row_orthonormalize (C* m, nat r, nat c, C* l) {
00894 // In place Gram Schmidt orthonormalization of the row vectors of m.
00895 // The matrix l is lower triangular. In return m * l equals the
00896 // input matrix m.
00897   for (nat i= 0; i < r; i++) {
00898     for (nat j= 0; j < i; j++) {
00899       C mu= row_inn_prod (m, i, j, r, c);
00900       Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00901       *(l + Mat::index (i, j, r, r))= mu;
00902     }
00903     C a= row_inn_prod (m, i, i, r, c);
00904     if (a != 0) {
00905       C b= sqrt (a);
00906       Mat::row_div (m, b, i, r, c);
00907       *(l + Mat::index (i, i, r, r))= b;
00908     }
00909   }
00910 }
00911
00912 TMPL static void
00913 col_orthonormalize (C* m, nat r, nat c, C* l) {
00914 // Idem for columns.
00915   for (nat i= 0; i < c; i++) {
00916     for (nat j= 0; j < i; j++) {
00917       C mu= col_inn_prod (m, i, j, r, c);
00918       Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00919       *(l + Mat::index (j, i, c, c))= mu;
00920     }
00921     C a= col_inn_prod (m, i, i, r, c);
00922     if (a != 0) {
00923       C b= sqrt (a);
00924       Mat::col_div (m, b, i, r, c);
00925       *(l + Mat::index (i, i, c, c))= b;
00926     }
00927   }
00928 }
00929
00930 }; // implementation<matrix_orthogonalization,V,matrix_naive>
00931
00932 #undef TMPL
00933 }//namespace mmx
00934 #endif //__MMX__MATRIX_NAIVE__HPP