numerix_doc 0.4
/Users/mourrain/Devel/mmx/numerix/include/numerix/sse.hpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : sse.hpp
00004 * DESCRIPTION: Wrapper for SSE instructions
00005 * COPYRIGHT  : (C) 2008  Joris van der Hoeven and Gregoire Lecerf
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012 
00013 #ifndef __MMX_SSE_HPP
00014 #define __MMX_SSE_HPP
00015 #include <numerix/simd.hpp>
00016 
00017 #if defined (NUMERIX_ENABLE_SIMD) && defined (__SSE2__)
00018 #include <stdint.h>
00019 #ifdef __SSE2__
00020 #include <emmintrin.h>
00021 #endif
00022 #ifdef __SSE3__
00023 #include <pmmintrin.h>
00024 #endif
00025 #ifdef __SSSE3__
00026 #include <tmmintrin.h>
00027 #endif
00028 #ifdef __SSE4A__
00029 #include <ammintrin.h>
00030 #endif
00031 
00032 #include <basix/compound.hpp>
00033 #include <basix/identifiers.hpp>
00034 #include <basix/syntactic.hpp>
00035 #include <numerix/complex.hpp>
00036 
00037 namespace mmx {
00038 
00039 /******************************************************************************
00040 * Generic vectorial routines
00041 ******************************************************************************/
00042   
00043 template<typename C> inline typename Simd_type (C)
00044 simd_load_aligned (const C* v) {
00045   return (typename Simd_type (C))
00046     _mm_load_si128 ((const __m128i*) v); }
00047 
00048 template<typename C> inline void
00049 simd_save_aligned (C* v, const typename Simd_type (C)& x) {
00050   _mm_store_si128 ((__m128i*) v, (const __m128i) x); }
00051 
00052 template<typename C> inline void
00053 simd_save (C* v, const typename Simd_type(C)& x) {
00054   _mm_storeu_si128 ((__m128i*) v, (const __m128i) x); }
00055 
00056 template<typename C> inline typename Simd_type (C)
00057 simd_load (const C* v0, const C* v1) {
00058   return simd_set (*v0, *v1); }
00059 
00060 template<typename C> inline void
00061 simd_save (C* v0, C* v1, const typename Simd_type (C)& x) {
00062   static C v[Simd_size (C)]; // ensures alignment
00063   simd_save_aligned (v, x);
00064   *v0 = v[0]; *v1 = v[1]; }
00065 
00066 template<typename C> inline typename Simd_type (C)
00067 simd_load (const C* v0, const C* v1, const C* v2, const C* v3) {
00068   return simd_set (*v0, *v1, *v2, *v3); }
00069 
00070 template<typename C> inline void
00071 simd_save (C* v0, C* v1, C* v2, C* v3, const typename Simd_type (C)& x) {
00072   static C v[Simd_size (C)]; // ensures alignment
00073   simd_save_aligned (v, x);
00074   *v0 = v[0]; *v1 = v[1]; *v2 = v[2]; *v3 = v[3]; }
00075 
00076 template<typename C> inline typename Simd_type (C)
00077 simd_load (const C* v0, const C* v1, const C* v2, const C* v3,
00078            const C* v4, const C* v5, const C* v6, const C* v7) {
00079   return simd_set (*v0, *v1, *v2, *v3, *v4, *v5, *v6, *v7); }
00080 
00081 template<typename C> inline void
00082 simd_save (C* v0, C* v1, C* v2, C* v3, C* v4, C* v5, C* v6, C* v7,
00083            const typename Simd_type (C)& x) {
00084   static C v[Simd_size (C)]; // ensures alignment
00085   simd_save_aligned (v, x);
00086   *v0 = v[0]; *v1 = v[1]; *v2 = v[2]; *v3 = v[3];
00087   *v4 = v[4]; *v5 = v[5]; *v6 = v[6]; *v7 = v[7]; }
00088 
00089 template<typename C> inline typename Simd_type (C)
00090 simd_load (const C* v0, const C* v1, const C* v2, const C* v3,
00091            const C* v4, const C* v5, const C* v6, const C* v7,
00092            const C* v8, const C* v9, const C* v10, const C* v11,
00093            const C* v12, const C* v13, const C* v14, const C* v15) {
00094   return simd_set (*v0, *v1, *v2, *v3, *v4, *v5, *v6, *v7,
00095                    *v8, *v9, *v10, *v11, *v12, *v13, *v14, *v15); }
00096 
00097 template<typename C> inline void
00098 simd_save (C* v0, C* v1, C* v2, C* v3, C* v4, C* v5, C* v6, C* v7,
00099            C* v8, C* v9, C* v10, C* v11, C* v12, C* v13, C* v14, C* v15,
00100            const typename Simd_type (C)& x) {
00101   static C v[Simd_size (C)]; // ensures alignment
00102   simd_save_aligned (v, x);
00103   *v0 = v[0]; *v1 = v[1]; *v2 = v[2]; *v3 = v[3];
00104   *v4 = v[4]; *v5 = v[5]; *v6 = v[6]; *v7 = v[7];
00105   *v8 = v[8]; *v9 = v[9]; *v10 = v[10]; *v11 = v[11];
00106   *v12 = v[12]; *v13 = v[13]; *v14 = v[14]; *v15 = v[15]; }
00107 
00108 template<typename C> inline C
00109 simd_big_add (const typename Simd_type (C)& x) {
00110   C r = 0;
00111   for (nat i = 0; i < Simd_size (C); i++)
00112     r += ((C*) &x) [i];
00113   return r; }
00114  
00115 /******************************************************************************
00116 * Vector light interface
00117 ******************************************************************************/
00118 
00119 // Note that vectorial +, -, *, /, unary minus, ^, |, &, ~ operations
00120 // are supposed to be furnished by the compiler
00121 
00122 template<typename V> inline syntactic
00123 simd_flatten (const V& x) {
00124   typedef typename Simd_base_type(V) C;
00125   static const nat size = Simd_size(C);
00126   C* v = mmx_new<C> (size);
00127   simd_save_aligned (v, x);
00128   vector<syntactic> w = fill <syntactic> (size);;
00129   for (nat i = 0; i < size; i++)
00130     w[i] = flatten (v[i]);
00131   mmx_delete<C> (v, size);
00132   return apply (GEN_SQTUPLE, w); 
00133 }
00134 
00135 #define SIMD_SUGAR(C,V)                                                 \
00136   inline syntactic flatten (const V& x) {                               \
00137     return simd_flatten (x); }                                          \
00138   inline bool equal (const V& x, const V& y) {                          \
00139     return _mm_movemask_epi8 (                                          \
00140       (__m128i) _mm_cmpeq_epi32 ((const __m128i) x,                     \
00141                                  (const __m128i) y)) == 131071; }       \
00142   inline bool unequal (const V& x, const V& y) {                        \
00143     return ! equal (x, y); }                                            \
00144   STMPL inline void clear (V& x) {                                      \
00145     x = simd_set_duplicate (C (0)); }                                   \
00146   STMPL inline void mul (V& x, const V& y1, const C& y2) {              \
00147     x = y1 * simd_set_duplicate (y2); }                                 \
00148   STMPL inline void mul_add (V& x, const V& y1, const C& y2) {          \
00149     x += y1 * simd_set_duplicate (y2); }
00150 
00151 /******************************************************************************
00152 * Vectors of two doubles
00153 ******************************************************************************/
00154 
00155 typedef double __attribute__((vector_size(16))) sse_double;
00156 
00157 template<>
00158 struct simd_helper<double> {
00159   typedef sse_double type;
00160   static const nat size = 2; };
00161 
00162 template<>
00163 struct simd_base_helper<sse_double> {
00164   typedef double type; };
00165 
00166 inline sse_double
00167 simd_load (const double* v) {
00168   return _mm_loadu_pd (v); }
00169 
00170 inline sse_double
00171 simd_set_duplicate (double x) {
00172   return _mm_set1_pd (x); }
00173 
00174 inline sse_double
00175 simd_set (double v0, double v1) {
00176   return _mm_set_pd (v1, v0); }
00177 
00178 #ifdef __SSE3__
00179 STMPL inline double
00180 simd_big_add (const sse_double& x) {
00181   double r;
00182   sse_double y = _mm_hadd_pd (x, simd_set_duplicate((double) 0));
00183   _mm_storel_pd (&r, y);
00184   return r;
00185 }
00186 #endif
00187 
00188 // Comparisons
00189 inline sse_double
00190 simd_equal (const sse_double& x, const sse_double& y) {
00191   return _mm_cmpeq_pd (x, y); }
00192   
00193 inline sse_double
00194 simd_unequal (const sse_double& x, const sse_double& y) {
00195   return _mm_cmpneq_pd (x, y); }
00196 
00197 inline sse_double
00198 simd_less (const sse_double& x, const sse_double& y) {
00199   return _mm_cmplt_pd (x, y); }
00200 
00201 inline sse_double
00202 simd_gtr (const sse_double& x, const sse_double& y) {
00203   return _mm_cmpgt_pd (x, y); }
00204 
00205 inline sse_double
00206 simd_lesseq (const sse_double& x, const sse_double& y) {
00207   return _mm_cmple_pd (x, y); }
00208 
00209 inline sse_double
00210 simd_gtreq (const sse_double& x, const sse_double& y) {
00211   return _mm_cmpge_pd (x, y); }
00212 
00213 // Min, max
00214 inline sse_double
00215 min (const sse_double& x, const sse_double& y) { 
00216   return _mm_min_pd (x, y); }
00217 
00218 inline sse_double
00219 max (const sse_double& x, const sse_double& y) {
00220   return _mm_max_pd (x, y); }
00221 
00222 inline sse_double
00223 simd_shuffle (const sse_double& x, const sse_double& y, int i) {
00224   return _mm_shuffle_pd (x, y, i); }
00225 
00226 // Specific
00227 inline sse_double
00228 simd_load_duplicate (const double* v) {
00229   return _mm_load1_pd (v); }
00230 
00231 inline sse_double
00232 simd_load (const double* v0, const double* v1) {
00233   return _mm_loadh_pd (_mm_load1_pd (v0), v1); }
00234 
00235 inline void
00236 simd_save (double* v0, double* v1, const sse_double& x) {
00237   _mm_storel_pd (v0, x); _mm_storeh_pd (v1, x); }
00238 
00239 inline sse_double
00240 simd_swap (const sse_double& x) {
00241   return _mm_shuffle_pd (x, x, 1); }
00242 
00243 // Printing and equalities
00244 SIMD_SUGAR (double, sse_double)
00245 
00246 /******************************************************************************
00247 * Vectors of two complexified doubles
00248 ******************************************************************************/
00249 
00250 typedef complex<    double>      complex_double;
00251 typedef complex<sse_double>  sse_complex_double;
00252 
00253 template<>
00254 struct simd_helper<complex_double> {
00255   typedef sse_complex_double type;
00256   static const nat size = 2; };
00257 
00258 inline sse_complex_double
00259 simd_set_duplicate (const complex_double& z) {
00260   return sse_complex_double (simd_set_duplicate (Re (z)),
00261                              simd_set_duplicate (Im (z))); }
00262 inline sse_complex_double
00263 simd_set (const complex_double& z0, const complex_double& z1) {
00264   return sse_complex_double (simd_set (Re (z0), Re (z1)),
00265                              simd_set (Im (z0), Im (z1))); }
00266 inline sse_complex_double
00267 simd_load_duplicate (const complex_double* v) {
00268   const double* w= (double*) ((void*) v);
00269   return sse_complex_double (simd_load_duplicate (w),
00270                              simd_load_duplicate (w + 1)); }
00271 
00272 template<> inline syntactic
00273 flatten (const sse_complex_double& z) {
00274   return flatten (Re (z)) + flatten (Im (z)) * Imaginary (syntactic); }
00275 
00276 /******************************************************************************
00277 * Vectors of int64_t
00278 ******************************************************************************/
00279 
00280 typedef int64_t __attribute__((vector_size(16))) sse_int64_t;
00281 
00282 template<>
00283 struct simd_helper<int64_t> {
00284   typedef sse_int64_t type;
00285   static const nat size = 2; };
00286 
00287 template<>
00288 struct simd_base_helper<sse_int64_t> {
00289   typedef int64_t type; };
00290 
00291 inline sse_int64_t
00292 simd_set_duplicate (int64_t x) {
00293   return sse_int64_t(_mm_set1_epi64 ((__m64) x)); }
00294 
00295 inline sse_int64_t
00296 simd_set (int64_t x0, int64_t x1) {
00297   return sse_int64_t(_mm_set_epi64 ((__m64) x1, (__m64) x0)); }
00298 
00299 // Shifts
00300 inline sse_int64_t
00301 simd_sll (const sse_int64_t& x, int i) {
00302   return sse_int64_t(_mm_slli_epi64 ((__m128i) x, i)); }
00303 
00304 inline sse_int64_t
00305 simd_srl (const sse_int64_t& x, int i) {
00306   return sse_int64_t(_mm_srli_epi64 ((__m128i) x, i)); }
00307 
00308 // Printing and equalities
00309 SIMD_SUGAR (int64_t, sse_int64_t)
00310 
00311 /******************************************************************************
00312 * Vectors of uint64_t
00313 ******************************************************************************/
00314 
00315 typedef uint64_t __attribute__((vector_size(16))) sse_uint64_t;
00316 
00317 template<>
00318 struct simd_helper<uint64_t> {
00319   typedef sse_uint64_t type;
00320   static const nat size = 2; };
00321 
00322 template<>
00323 struct simd_base_helper<sse_uint64_t> {
00324   typedef uint64_t type; };
00325 
00326 inline sse_uint64_t
00327 simd_set_duplicate (uint64_t x) {
00328   return sse_uint64_t(_mm_set1_epi64 ((__m64) x)); }
00329 
00330 inline sse_uint64_t
00331 simd_set (uint64_t x0, uint64_t x1) {
00332   return sse_uint64_t(_mm_set_epi64 ((__m64) x1, (__m64) x0)); }
00333 
00334 // Shifts
00335 inline sse_uint64_t
00336 simd_sll (const sse_uint64_t& x, int i) {
00337   return sse_uint64_t(_mm_slli_epi64 ((__m128i) x, i)); }
00338 
00339 inline sse_uint64_t
00340 simd_srl (const sse_uint64_t& x, int i) {
00341   return sse_uint64_t(_mm_srli_epi64 ((__m128i) x, i)); }
00342 
00343 // Printing and equalities
00344 SIMD_SUGAR (uint64_t, sse_uint64_t)
00345 
00346 /******************************************************************************
00347 * Vectors of int32_t
00348 ******************************************************************************/
00349 
00350 typedef int32_t __attribute__((vector_size(16))) sse_int32_t;
00351 
00352 template<>
00353 struct simd_helper<int32_t> {
00354   typedef sse_int32_t type;
00355   static const nat size = 4; };
00356 
00357 template<>
00358 struct simd_base_helper<sse_int32_t> {
00359   typedef int32_t type; };
00360 
00361 inline sse_int32_t
00362 simd_set_duplicate (int32_t x) {
00363   return sse_int32_t(_mm_set1_epi32 (x)); }
00364 
00365 inline sse_int32_t
00366 simd_set (int32_t x0, int32_t x1, int32_t x2, int32_t x3) {
00367   return sse_int32_t(_mm_set_epi32 (x3, x2, x1, x0)); }
00368 
00369 // Comparisons
00370 inline sse_int32_t
00371 simd_equal (const sse_int32_t& x, const sse_int32_t& y) {
00372   return sse_int32_t(_mm_cmpeq_epi32 ((__m128i) x, (__m128i) y)); }
00373   
00374 inline sse_int32_t
00375 simd_less (const sse_int32_t& x, const sse_int32_t& y) {
00376   return sse_int32_t(_mm_cmplt_epi32 ((__m128i) x, (__m128i) y)); }
00377 
00378 inline sse_int32_t
00379 simd_gtr (const sse_int32_t& x, const sse_int32_t& y) {
00380   return sse_int32_t(_mm_cmpgt_epi32 ((__m128i) x, (__m128i) y)); }
00381 
00382 // Shifts
00383 inline sse_int32_t
00384 simd_sll (const sse_int32_t& x, int i) {
00385   return sse_int32_t(_mm_slli_epi32 ((__m128i) x, i)); }
00386 
00387 inline sse_int32_t
00388 simd_srl (const sse_int32_t& x, int i) {
00389   return sse_int32_t(_mm_srli_epi32 ((__m128i) x, i)); }
00390 
00391 inline sse_int32_t
00392 simd_sra (const sse_int32_t& x, int i) {
00393   return sse_int32_t(_mm_srai_epi32 ((__m128i) x, i)); }
00394 
00395 // Printing and equalities
00396 SIMD_SUGAR (int32_t, sse_int32_t)
00397 
00398 /******************************************************************************
00399 * Vectors of uint32_t
00400 ******************************************************************************/
00401 
00402 typedef uint32_t __attribute__((vector_size(16))) sse_uint32_t;
00403 
00404 template<>
00405 struct simd_helper<uint32_t> {
00406   typedef sse_uint32_t type;
00407   static const nat size = 4; };
00408 
00409 template<>
00410 struct simd_base_helper<sse_uint32_t> {
00411   typedef uint32_t type; };
00412 
00413 inline sse_uint32_t
00414 simd_set_duplicate (uint32_t x) {
00415   return sse_uint32_t(_mm_set1_epi32 (x)); }
00416 
00417 inline sse_uint32_t
00418 simd_set (uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) {
00419   return sse_uint32_t(_mm_set_epi32 (x3, x2, x1, x0)); }
00420 
00421 // Comparisons
00422 inline sse_uint32_t
00423 simd_equal (const sse_uint32_t& x, const sse_uint32_t& y) {
00424   return sse_uint32_t(_mm_cmpeq_epi32 ((__m128i) x, (__m128i) y)); }
00425   
00426 // Shifts
00427 inline sse_uint32_t
00428 simd_sll (const sse_uint32_t& x, int i) {
00429   return sse_uint32_t(_mm_slli_epi32 ((__m128i) x, i)); }
00430 
00431 inline sse_uint32_t
00432 simd_srl (const sse_uint32_t& x, int i) {
00433   return sse_uint32_t(_mm_srli_epi32 ((__m128i) x, i)); }
00434 
00435 // Printing and equalities
00436 SIMD_SUGAR (uint32_t, sse_uint32_t)
00437 
00438 /******************************************************************************
00439 * Vectors of eight int16_t
00440 ******************************************************************************/
00441 
00442 typedef int16_t __attribute__((vector_size(16))) sse_int16_t;
00443 
00444 template<>
00445 struct simd_helper<int16_t> {
00446   typedef sse_int16_t type;
00447   static const nat size = 8; };
00448 
00449 template<>
00450 struct simd_base_helper<sse_int16_t> {
00451   typedef int16_t type; };
00452 
00453 inline sse_int16_t
00454 simd_set_duplicate (int16_t x) {
00455   return sse_int16_t(_mm_set1_epi16 (x)); }
00456 
00457 inline sse_int16_t
00458 simd_set (int16_t x0, int16_t x1, int16_t x2, int16_t x3,
00459           int16_t x4, int16_t x5, int16_t x6, int16_t x7) {
00460   return sse_int16_t(_mm_set_epi16 (x7, x6, x5, x4, x3, x2, x1, x0)); }
00461 
00462 // Comparisons
00463 inline sse_int16_t
00464 simd_equal (const sse_int16_t& x, const sse_int16_t& y) {
00465   return sse_int16_t(_mm_cmpeq_epi16 ((__m128i) x, (__m128i) y)); }
00466   
00467 inline sse_int16_t
00468 simd_less (const sse_int16_t& x, const sse_int16_t& y) {
00469   return sse_int16_t(_mm_cmplt_epi16 ((__m128i) x, (__m128i) y)); }
00470 
00471 inline sse_int16_t
00472 simd_gtr (const sse_int16_t& x, const sse_int16_t& y) {
00473   return sse_int16_t(_mm_cmpgt_epi16 ((__m128i) x, (__m128i) y)); }
00474 
00475 // Min, max
00476 inline sse_int16_t
00477 min (const sse_int16_t& x, const sse_int16_t& y) { 
00478   return sse_int16_t(_mm_min_epi16 ((__m128i) x, (__m128i) y)); }
00479 
00480 inline sse_int16_t
00481 max (const sse_int16_t& x, const sse_int16_t& y) {
00482   return sse_int16_t(_mm_max_epi16 ((__m128i) x, (__m128i) y)); }
00483 
00484 // Shifts
00485 inline sse_int16_t
00486 simd_sll (const sse_int16_t& x, int i) {
00487   return sse_int16_t(_mm_slli_epi16 ((__m128i) x, i)); }
00488 
00489 inline sse_int16_t
00490 simd_sra (const sse_int16_t& x, int i) {
00491   return sse_int16_t(_mm_srai_epi16 ((__m128i) x, i)); }
00492 
00493 inline sse_int16_t
00494 simd_srl (const sse_int16_t& x, int i) {
00495   return sse_int16_t(_mm_srli_epi16 ((__m128i) x, i)); }
00496 
00497 // Printing and equalities
00498 SIMD_SUGAR (int16_t, sse_int16_t)
00499 
00500 /******************************************************************************
00501 * Vectors of eight uint16_t
00502 ******************************************************************************/
00503 
00504 typedef uint16_t __attribute__((vector_size(16))) sse_uint16_t;
00505 
00506 template<>
00507 struct simd_helper<uint16_t> {
00508   typedef sse_uint16_t type;
00509   static const nat size = 8; };
00510 
00511 template<>
00512 struct simd_base_helper<sse_uint16_t> {
00513   typedef uint16_t type; };
00514 
00515 inline sse_uint16_t
00516 simd_set_duplicate (uint16_t x) {
00517   return sse_uint16_t(_mm_set1_epi16 ((short) x)); }
00518 
00519 inline sse_uint16_t
00520 simd_set (uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3,
00521           uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) {
00522   return sse_uint16_t(_mm_set_epi16 ((short) x7, (short) x6, (short) x5,
00523         (short) x4, (short) x3, (short) x2, (short)  x1, (short) x0)); }
00524 
00525 // Comparisons
00526 inline sse_uint16_t
00527 simd_equal (const sse_uint16_t& x, const sse_uint16_t& y) {
00528   return sse_uint16_t(_mm_cmpeq_epi16 ((__m128i) x, (__m128i) y)); }
00529   
00530 // Shifts
00531 inline sse_uint16_t
00532 simd_sll (const sse_uint16_t& x, int i) {
00533   return sse_uint16_t(_mm_slli_epi16 ((__m128i) x, i)); }
00534 
00535 inline sse_uint16_t
00536 simd_srl (const sse_uint16_t& x, int i) {
00537   return sse_uint16_t(_mm_srli_epi16 ((__m128i) x, i)); }
00538 
00539 // Printing and equalities
00540 SIMD_SUGAR (uint16_t, sse_uint16_t)
00541 
00542 /******************************************************************************
00543 * Vectors of sixteen int8_t
00544 ******************************************************************************/
00545 
00546 typedef int8_t __attribute__((vector_size(16))) sse_int8_t;
00547 
00548 template<>
00549 struct simd_helper<int8_t> {
00550   typedef sse_int8_t type;
00551   static const nat size = 16;
00552 };
00553 
00554 template<>
00555 struct simd_base_helper<sse_int8_t> {
00556   typedef int8_t type; };
00557 
00558 inline sse_int8_t
00559 simd_set_duplicate (int8_t x) {
00560   return sse_int8_t(_mm_set1_epi8 ((char) x)); }
00561 
00562 inline sse_int8_t 
00563 simd_set (int8_t x0, int8_t x1,  int8_t x2,  int8_t x3,
00564           int8_t x4, int8_t x5,  int8_t x6,  int8_t x7,
00565           int8_t x8, int8_t x9,  int8_t x10, int8_t x11,
00566           int8_t x12, int8_t x13, int8_t x14, int8_t x15) {
00567   return sse_int8_t(_mm_set_epi8 (x15, x14,  x13,  x12,  x11,  x10,  x9,  x8,
00568                        x7, x6, x5, x4, x3, x2, x1, x0)); }
00569 
00570 // Comparisons
00571 inline sse_int8_t
00572 simd_equal (const sse_int8_t& x, const sse_int8_t& y) {
00573   return sse_int8_t(_mm_cmpeq_epi8 ((__m128i) x, (__m128i) y)); }
00574   
00575 inline sse_int8_t
00576 simd_less (const sse_int8_t& x, const sse_int8_t& y) {
00577   return sse_int8_t(_mm_cmplt_epi8 ((__m128i) x, (__m128i) y)); }
00578 
00579 inline sse_int8_t
00580 simd_gtr (const sse_int8_t& x, const sse_int8_t& y) {
00581   return sse_int8_t(_mm_cmpgt_epi8 ((__m128i) x, (__m128i) y)); }
00582 
00583 // Printing and equalities
00584 SIMD_SUGAR (int8_t, sse_int8_t)
00585 
00586 /******************************************************************************
00587 * Vectors of sixteen uint8_t
00588 ******************************************************************************/
00589 
00590 typedef uint8_t __attribute__((vector_size(16))) sse_uint8_t;
00591 
00592 template<>
00593 struct simd_helper<uint8_t> {
00594   typedef sse_uint8_t type;
00595   static const nat size = 16;
00596 };
00597 
00598 template<>
00599 struct simd_base_helper<sse_uint8_t> {
00600   typedef uint8_t type; };
00601 
00602 inline sse_uint8_t
00603 simd_set_duplicate (uint8_t x) {
00604   return sse_uint8_t(_mm_set1_epi8 (x)); }
00605 
00606 inline sse_uint8_t 
00607 simd_set (uint8_t x0, uint8_t x1,  uint8_t x2,  uint8_t x3,
00608           uint8_t x4, uint8_t x5,  uint8_t x6,  uint8_t x7,
00609           uint8_t x8, uint8_t x9,  uint8_t x10, uint8_t x11,
00610           uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) {
00611   return sse_uint8_t(_mm_set_epi8 (x15, x14,  x13,  x12,  x11,  x10,  x9,  x8,
00612                        x7, x6, x5, x4, x3, x2, x1, x0)); }
00613 
00614 // Comparisons
00615 inline sse_uint8_t
00616 simd_equal (const sse_uint8_t& x, const sse_uint8_t& y) {
00617   return sse_uint8_t(_mm_cmpeq_epi8 ((__m128i) x, (__m128i) y)); }
00618   
00619 // Min, max
00620 inline sse_uint8_t
00621 min (const sse_uint8_t& x, const sse_uint8_t& y) { 
00622   return sse_uint8_t(_mm_min_epu8 ((__m128i) x, (__m128i) y)); }
00623 
00624 inline sse_uint8_t
00625 max (const sse_uint8_t& x, const sse_uint8_t& y) {
00626   return sse_uint8_t(_mm_max_epu8 ((__m128i) x, (__m128i) y)); }
00627 
00628 // Printing and equalities
00629 SIMD_SUGAR (uint8_t, sse_uint8_t)
00630 
00631 #undef SIMD_SUGAR
00632 } // namespace mmx
00633 
00634 #endif // NUMERIX_ENABLE_SIMD
00635 #endif // __MMX_SSE_HPP
 All Classes Namespaces Files Functions Variables Typedefs Friends Defines