numerix_doc 0.4
|
00001 00002 /****************************************************************************** 00003 * MODULE : sse.hpp 00004 * DESCRIPTION: Wrapper for SSE instructions 00005 * COPYRIGHT : (C) 2008 Joris van der Hoeven and Gregoire Lecerf 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #ifndef __MMX_SSE_HPP 00014 #define __MMX_SSE_HPP 00015 #include <numerix/simd.hpp> 00016 00017 #if defined (NUMERIX_ENABLE_SIMD) && defined (__SSE2__) 00018 #include <stdint.h> 00019 #ifdef __SSE2__ 00020 #include <emmintrin.h> 00021 #endif 00022 #ifdef __SSE3__ 00023 #include <pmmintrin.h> 00024 #endif 00025 #ifdef __SSSE3__ 00026 #include <tmmintrin.h> 00027 #endif 00028 #ifdef __SSE4A__ 00029 #include <ammintrin.h> 00030 #endif 00031 00032 #include <basix/compound.hpp> 00033 #include <basix/identifiers.hpp> 00034 #include <basix/syntactic.hpp> 00035 #include <numerix/complex.hpp> 00036 00037 namespace mmx { 00038 00039 /****************************************************************************** 00040 * Generic vectorial routines 00041 ******************************************************************************/ 00042 00043 template<typename C> inline typename Simd_type (C) 00044 simd_load_aligned (const C* v) { 00045 return (typename Simd_type (C)) 00046 _mm_load_si128 ((const __m128i*) v); } 00047 00048 template<typename C> inline void 00049 simd_save_aligned (C* v, const typename Simd_type (C)& x) { 00050 _mm_store_si128 ((__m128i*) v, (const __m128i) x); } 00051 00052 template<typename C> inline void 00053 simd_save (C* v, const typename Simd_type(C)& x) { 00054 _mm_storeu_si128 ((__m128i*) v, (const __m128i) x); } 00055 00056 template<typename C> inline typename Simd_type (C) 00057 simd_load (const C* v0, const C* v1) { 00058 return simd_set (*v0, *v1); } 00059 00060 template<typename C> inline void 00061 simd_save (C* v0, C* v1, const typename Simd_type (C)& x) { 00062 static C v[Simd_size (C)]; // ensures alignment 00063 simd_save_aligned (v, x); 00064 *v0 = v[0]; *v1 = v[1]; } 00065 00066 template<typename C> inline typename Simd_type (C) 00067 simd_load (const C* v0, const C* v1, const C* v2, const C* v3) { 00068 return simd_set (*v0, *v1, *v2, *v3); } 00069 00070 template<typename C> inline void 00071 simd_save (C* v0, C* v1, C* v2, C* v3, const typename Simd_type (C)& x) { 00072 static C v[Simd_size (C)]; // ensures alignment 00073 simd_save_aligned (v, x); 00074 *v0 = v[0]; *v1 = v[1]; *v2 = v[2]; *v3 = v[3]; } 00075 00076 template<typename C> inline typename Simd_type (C) 00077 simd_load (const C* v0, const C* v1, const C* v2, const C* v3, 00078 const C* v4, const C* v5, const C* v6, const C* v7) { 00079 return simd_set (*v0, *v1, *v2, *v3, *v4, *v5, *v6, *v7); } 00080 00081 template<typename C> inline void 00082 simd_save (C* v0, C* v1, C* v2, C* v3, C* v4, C* v5, C* v6, C* v7, 00083 const typename Simd_type (C)& x) { 00084 static C v[Simd_size (C)]; // ensures alignment 00085 simd_save_aligned (v, x); 00086 *v0 = v[0]; *v1 = v[1]; *v2 = v[2]; *v3 = v[3]; 00087 *v4 = v[4]; *v5 = v[5]; *v6 = v[6]; *v7 = v[7]; } 00088 00089 template<typename C> inline typename Simd_type (C) 00090 simd_load (const C* v0, const C* v1, const C* v2, const C* v3, 00091 const C* v4, const C* v5, const C* v6, const C* v7, 00092 const C* v8, const C* v9, const C* v10, const C* v11, 00093 const C* v12, const C* v13, const C* v14, const C* v15) { 00094 return simd_set (*v0, *v1, *v2, *v3, *v4, *v5, *v6, *v7, 00095 *v8, *v9, *v10, *v11, *v12, *v13, *v14, *v15); } 00096 00097 template<typename C> inline void 00098 simd_save (C* v0, C* v1, C* v2, C* v3, C* v4, C* v5, C* v6, C* v7, 00099 C* v8, C* v9, C* v10, C* v11, C* v12, C* v13, C* v14, C* v15, 00100 const typename Simd_type (C)& x) { 00101 static C v[Simd_size (C)]; // ensures alignment 00102 simd_save_aligned (v, x); 00103 *v0 = v[0]; *v1 = v[1]; *v2 = v[2]; *v3 = v[3]; 00104 *v4 = v[4]; *v5 = v[5]; *v6 = v[6]; *v7 = v[7]; 00105 *v8 = v[8]; *v9 = v[9]; *v10 = v[10]; *v11 = v[11]; 00106 *v12 = v[12]; *v13 = v[13]; *v14 = v[14]; *v15 = v[15]; } 00107 00108 template<typename C> inline C 00109 simd_big_add (const typename Simd_type (C)& x) { 00110 C r = 0; 00111 for (nat i = 0; i < Simd_size (C); i++) 00112 r += ((C*) &x) [i]; 00113 return r; } 00114 00115 /****************************************************************************** 00116 * Vector light interface 00117 ******************************************************************************/ 00118 00119 // Note that vectorial +, -, *, /, unary minus, ^, |, &, ~ operations 00120 // are supposed to be furnished by the compiler 00121 00122 template<typename V> inline syntactic 00123 simd_flatten (const V& x) { 00124 typedef typename Simd_base_type(V) C; 00125 static const nat size = Simd_size(C); 00126 C* v = mmx_new<C> (size); 00127 simd_save_aligned (v, x); 00128 vector<syntactic> w = fill <syntactic> (size);; 00129 for (nat i = 0; i < size; i++) 00130 w[i] = flatten (v[i]); 00131 mmx_delete<C> (v, size); 00132 return apply (GEN_SQTUPLE, w); 00133 } 00134 00135 #define SIMD_SUGAR(C,V) \ 00136 inline syntactic flatten (const V& x) { \ 00137 return simd_flatten (x); } \ 00138 inline bool equal (const V& x, const V& y) { \ 00139 return _mm_movemask_epi8 ( \ 00140 (__m128i) _mm_cmpeq_epi32 ((const __m128i) x, \ 00141 (const __m128i) y)) == 131071; } \ 00142 inline bool unequal (const V& x, const V& y) { \ 00143 return ! equal (x, y); } \ 00144 STMPL inline void clear (V& x) { \ 00145 x = simd_set_duplicate (C (0)); } \ 00146 STMPL inline void mul (V& x, const V& y1, const C& y2) { \ 00147 x = y1 * simd_set_duplicate (y2); } \ 00148 STMPL inline void mul_add (V& x, const V& y1, const C& y2) { \ 00149 x += y1 * simd_set_duplicate (y2); } 00150 00151 /****************************************************************************** 00152 * Vectors of two doubles 00153 ******************************************************************************/ 00154 00155 typedef double __attribute__((vector_size(16))) sse_double; 00156 00157 template<> 00158 struct simd_helper<double> { 00159 typedef sse_double type; 00160 static const nat size = 2; }; 00161 00162 template<> 00163 struct simd_base_helper<sse_double> { 00164 typedef double type; }; 00165 00166 inline sse_double 00167 simd_load (const double* v) { 00168 return _mm_loadu_pd (v); } 00169 00170 inline sse_double 00171 simd_set_duplicate (double x) { 00172 return _mm_set1_pd (x); } 00173 00174 inline sse_double 00175 simd_set (double v0, double v1) { 00176 return _mm_set_pd (v1, v0); } 00177 00178 #ifdef __SSE3__ 00179 STMPL inline double 00180 simd_big_add (const sse_double& x) { 00181 double r; 00182 sse_double y = _mm_hadd_pd (x, simd_set_duplicate((double) 0)); 00183 _mm_storel_pd (&r, y); 00184 return r; 00185 } 00186 #endif 00187 00188 // Comparisons 00189 inline sse_double 00190 simd_equal (const sse_double& x, const sse_double& y) { 00191 return _mm_cmpeq_pd (x, y); } 00192 00193 inline sse_double 00194 simd_unequal (const sse_double& x, const sse_double& y) { 00195 return _mm_cmpneq_pd (x, y); } 00196 00197 inline sse_double 00198 simd_less (const sse_double& x, const sse_double& y) { 00199 return _mm_cmplt_pd (x, y); } 00200 00201 inline sse_double 00202 simd_gtr (const sse_double& x, const sse_double& y) { 00203 return _mm_cmpgt_pd (x, y); } 00204 00205 inline sse_double 00206 simd_lesseq (const sse_double& x, const sse_double& y) { 00207 return _mm_cmple_pd (x, y); } 00208 00209 inline sse_double 00210 simd_gtreq (const sse_double& x, const sse_double& y) { 00211 return _mm_cmpge_pd (x, y); } 00212 00213 // Min, max 00214 inline sse_double 00215 min (const sse_double& x, const sse_double& y) { 00216 return _mm_min_pd (x, y); } 00217 00218 inline sse_double 00219 max (const sse_double& x, const sse_double& y) { 00220 return _mm_max_pd (x, y); } 00221 00222 inline sse_double 00223 simd_shuffle (const sse_double& x, const sse_double& y, int i) { 00224 return _mm_shuffle_pd (x, y, i); } 00225 00226 // Specific 00227 inline sse_double 00228 simd_load_duplicate (const double* v) { 00229 return _mm_load1_pd (v); } 00230 00231 inline sse_double 00232 simd_load (const double* v0, const double* v1) { 00233 return _mm_loadh_pd (_mm_load1_pd (v0), v1); } 00234 00235 inline void 00236 simd_save (double* v0, double* v1, const sse_double& x) { 00237 _mm_storel_pd (v0, x); _mm_storeh_pd (v1, x); } 00238 00239 inline sse_double 00240 simd_swap (const sse_double& x) { 00241 return _mm_shuffle_pd (x, x, 1); } 00242 00243 // Printing and equalities 00244 SIMD_SUGAR (double, sse_double) 00245 00246 /****************************************************************************** 00247 * Vectors of two complexified doubles 00248 ******************************************************************************/ 00249 00250 typedef complex< double> complex_double; 00251 typedef complex<sse_double> sse_complex_double; 00252 00253 template<> 00254 struct simd_helper<complex_double> { 00255 typedef sse_complex_double type; 00256 static const nat size = 2; }; 00257 00258 inline sse_complex_double 00259 simd_set_duplicate (const complex_double& z) { 00260 return sse_complex_double (simd_set_duplicate (Re (z)), 00261 simd_set_duplicate (Im (z))); } 00262 inline sse_complex_double 00263 simd_set (const complex_double& z0, const complex_double& z1) { 00264 return sse_complex_double (simd_set (Re (z0), Re (z1)), 00265 simd_set (Im (z0), Im (z1))); } 00266 inline sse_complex_double 00267 simd_load_duplicate (const complex_double* v) { 00268 const double* w= (double*) ((void*) v); 00269 return sse_complex_double (simd_load_duplicate (w), 00270 simd_load_duplicate (w + 1)); } 00271 00272 template<> inline syntactic 00273 flatten (const sse_complex_double& z) { 00274 return flatten (Re (z)) + flatten (Im (z)) * Imaginary (syntactic); } 00275 00276 /****************************************************************************** 00277 * Vectors of int64_t 00278 ******************************************************************************/ 00279 00280 typedef int64_t __attribute__((vector_size(16))) sse_int64_t; 00281 00282 template<> 00283 struct simd_helper<int64_t> { 00284 typedef sse_int64_t type; 00285 static const nat size = 2; }; 00286 00287 template<> 00288 struct simd_base_helper<sse_int64_t> { 00289 typedef int64_t type; }; 00290 00291 inline sse_int64_t 00292 simd_set_duplicate (int64_t x) { 00293 return sse_int64_t(_mm_set1_epi64 ((__m64) x)); } 00294 00295 inline sse_int64_t 00296 simd_set (int64_t x0, int64_t x1) { 00297 return sse_int64_t(_mm_set_epi64 ((__m64) x1, (__m64) x0)); } 00298 00299 // Shifts 00300 inline sse_int64_t 00301 simd_sll (const sse_int64_t& x, int i) { 00302 return sse_int64_t(_mm_slli_epi64 ((__m128i) x, i)); } 00303 00304 inline sse_int64_t 00305 simd_srl (const sse_int64_t& x, int i) { 00306 return sse_int64_t(_mm_srli_epi64 ((__m128i) x, i)); } 00307 00308 // Printing and equalities 00309 SIMD_SUGAR (int64_t, sse_int64_t) 00310 00311 /****************************************************************************** 00312 * Vectors of uint64_t 00313 ******************************************************************************/ 00314 00315 typedef uint64_t __attribute__((vector_size(16))) sse_uint64_t; 00316 00317 template<> 00318 struct simd_helper<uint64_t> { 00319 typedef sse_uint64_t type; 00320 static const nat size = 2; }; 00321 00322 template<> 00323 struct simd_base_helper<sse_uint64_t> { 00324 typedef uint64_t type; }; 00325 00326 inline sse_uint64_t 00327 simd_set_duplicate (uint64_t x) { 00328 return sse_uint64_t(_mm_set1_epi64 ((__m64) x)); } 00329 00330 inline sse_uint64_t 00331 simd_set (uint64_t x0, uint64_t x1) { 00332 return sse_uint64_t(_mm_set_epi64 ((__m64) x1, (__m64) x0)); } 00333 00334 // Shifts 00335 inline sse_uint64_t 00336 simd_sll (const sse_uint64_t& x, int i) { 00337 return sse_uint64_t(_mm_slli_epi64 ((__m128i) x, i)); } 00338 00339 inline sse_uint64_t 00340 simd_srl (const sse_uint64_t& x, int i) { 00341 return sse_uint64_t(_mm_srli_epi64 ((__m128i) x, i)); } 00342 00343 // Printing and equalities 00344 SIMD_SUGAR (uint64_t, sse_uint64_t) 00345 00346 /****************************************************************************** 00347 * Vectors of int32_t 00348 ******************************************************************************/ 00349 00350 typedef int32_t __attribute__((vector_size(16))) sse_int32_t; 00351 00352 template<> 00353 struct simd_helper<int32_t> { 00354 typedef sse_int32_t type; 00355 static const nat size = 4; }; 00356 00357 template<> 00358 struct simd_base_helper<sse_int32_t> { 00359 typedef int32_t type; }; 00360 00361 inline sse_int32_t 00362 simd_set_duplicate (int32_t x) { 00363 return sse_int32_t(_mm_set1_epi32 (x)); } 00364 00365 inline sse_int32_t 00366 simd_set (int32_t x0, int32_t x1, int32_t x2, int32_t x3) { 00367 return sse_int32_t(_mm_set_epi32 (x3, x2, x1, x0)); } 00368 00369 // Comparisons 00370 inline sse_int32_t 00371 simd_equal (const sse_int32_t& x, const sse_int32_t& y) { 00372 return sse_int32_t(_mm_cmpeq_epi32 ((__m128i) x, (__m128i) y)); } 00373 00374 inline sse_int32_t 00375 simd_less (const sse_int32_t& x, const sse_int32_t& y) { 00376 return sse_int32_t(_mm_cmplt_epi32 ((__m128i) x, (__m128i) y)); } 00377 00378 inline sse_int32_t 00379 simd_gtr (const sse_int32_t& x, const sse_int32_t& y) { 00380 return sse_int32_t(_mm_cmpgt_epi32 ((__m128i) x, (__m128i) y)); } 00381 00382 // Shifts 00383 inline sse_int32_t 00384 simd_sll (const sse_int32_t& x, int i) { 00385 return sse_int32_t(_mm_slli_epi32 ((__m128i) x, i)); } 00386 00387 inline sse_int32_t 00388 simd_srl (const sse_int32_t& x, int i) { 00389 return sse_int32_t(_mm_srli_epi32 ((__m128i) x, i)); } 00390 00391 inline sse_int32_t 00392 simd_sra (const sse_int32_t& x, int i) { 00393 return sse_int32_t(_mm_srai_epi32 ((__m128i) x, i)); } 00394 00395 // Printing and equalities 00396 SIMD_SUGAR (int32_t, sse_int32_t) 00397 00398 /****************************************************************************** 00399 * Vectors of uint32_t 00400 ******************************************************************************/ 00401 00402 typedef uint32_t __attribute__((vector_size(16))) sse_uint32_t; 00403 00404 template<> 00405 struct simd_helper<uint32_t> { 00406 typedef sse_uint32_t type; 00407 static const nat size = 4; }; 00408 00409 template<> 00410 struct simd_base_helper<sse_uint32_t> { 00411 typedef uint32_t type; }; 00412 00413 inline sse_uint32_t 00414 simd_set_duplicate (uint32_t x) { 00415 return sse_uint32_t(_mm_set1_epi32 (x)); } 00416 00417 inline sse_uint32_t 00418 simd_set (uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) { 00419 return sse_uint32_t(_mm_set_epi32 (x3, x2, x1, x0)); } 00420 00421 // Comparisons 00422 inline sse_uint32_t 00423 simd_equal (const sse_uint32_t& x, const sse_uint32_t& y) { 00424 return sse_uint32_t(_mm_cmpeq_epi32 ((__m128i) x, (__m128i) y)); } 00425 00426 // Shifts 00427 inline sse_uint32_t 00428 simd_sll (const sse_uint32_t& x, int i) { 00429 return sse_uint32_t(_mm_slli_epi32 ((__m128i) x, i)); } 00430 00431 inline sse_uint32_t 00432 simd_srl (const sse_uint32_t& x, int i) { 00433 return sse_uint32_t(_mm_srli_epi32 ((__m128i) x, i)); } 00434 00435 // Printing and equalities 00436 SIMD_SUGAR (uint32_t, sse_uint32_t) 00437 00438 /****************************************************************************** 00439 * Vectors of eight int16_t 00440 ******************************************************************************/ 00441 00442 typedef int16_t __attribute__((vector_size(16))) sse_int16_t; 00443 00444 template<> 00445 struct simd_helper<int16_t> { 00446 typedef sse_int16_t type; 00447 static const nat size = 8; }; 00448 00449 template<> 00450 struct simd_base_helper<sse_int16_t> { 00451 typedef int16_t type; }; 00452 00453 inline sse_int16_t 00454 simd_set_duplicate (int16_t x) { 00455 return sse_int16_t(_mm_set1_epi16 (x)); } 00456 00457 inline sse_int16_t 00458 simd_set (int16_t x0, int16_t x1, int16_t x2, int16_t x3, 00459 int16_t x4, int16_t x5, int16_t x6, int16_t x7) { 00460 return sse_int16_t(_mm_set_epi16 (x7, x6, x5, x4, x3, x2, x1, x0)); } 00461 00462 // Comparisons 00463 inline sse_int16_t 00464 simd_equal (const sse_int16_t& x, const sse_int16_t& y) { 00465 return sse_int16_t(_mm_cmpeq_epi16 ((__m128i) x, (__m128i) y)); } 00466 00467 inline sse_int16_t 00468 simd_less (const sse_int16_t& x, const sse_int16_t& y) { 00469 return sse_int16_t(_mm_cmplt_epi16 ((__m128i) x, (__m128i) y)); } 00470 00471 inline sse_int16_t 00472 simd_gtr (const sse_int16_t& x, const sse_int16_t& y) { 00473 return sse_int16_t(_mm_cmpgt_epi16 ((__m128i) x, (__m128i) y)); } 00474 00475 // Min, max 00476 inline sse_int16_t 00477 min (const sse_int16_t& x, const sse_int16_t& y) { 00478 return sse_int16_t(_mm_min_epi16 ((__m128i) x, (__m128i) y)); } 00479 00480 inline sse_int16_t 00481 max (const sse_int16_t& x, const sse_int16_t& y) { 00482 return sse_int16_t(_mm_max_epi16 ((__m128i) x, (__m128i) y)); } 00483 00484 // Shifts 00485 inline sse_int16_t 00486 simd_sll (const sse_int16_t& x, int i) { 00487 return sse_int16_t(_mm_slli_epi16 ((__m128i) x, i)); } 00488 00489 inline sse_int16_t 00490 simd_sra (const sse_int16_t& x, int i) { 00491 return sse_int16_t(_mm_srai_epi16 ((__m128i) x, i)); } 00492 00493 inline sse_int16_t 00494 simd_srl (const sse_int16_t& x, int i) { 00495 return sse_int16_t(_mm_srli_epi16 ((__m128i) x, i)); } 00496 00497 // Printing and equalities 00498 SIMD_SUGAR (int16_t, sse_int16_t) 00499 00500 /****************************************************************************** 00501 * Vectors of eight uint16_t 00502 ******************************************************************************/ 00503 00504 typedef uint16_t __attribute__((vector_size(16))) sse_uint16_t; 00505 00506 template<> 00507 struct simd_helper<uint16_t> { 00508 typedef sse_uint16_t type; 00509 static const nat size = 8; }; 00510 00511 template<> 00512 struct simd_base_helper<sse_uint16_t> { 00513 typedef uint16_t type; }; 00514 00515 inline sse_uint16_t 00516 simd_set_duplicate (uint16_t x) { 00517 return sse_uint16_t(_mm_set1_epi16 ((short) x)); } 00518 00519 inline sse_uint16_t 00520 simd_set (uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, 00521 uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) { 00522 return sse_uint16_t(_mm_set_epi16 ((short) x7, (short) x6, (short) x5, 00523 (short) x4, (short) x3, (short) x2, (short) x1, (short) x0)); } 00524 00525 // Comparisons 00526 inline sse_uint16_t 00527 simd_equal (const sse_uint16_t& x, const sse_uint16_t& y) { 00528 return sse_uint16_t(_mm_cmpeq_epi16 ((__m128i) x, (__m128i) y)); } 00529 00530 // Shifts 00531 inline sse_uint16_t 00532 simd_sll (const sse_uint16_t& x, int i) { 00533 return sse_uint16_t(_mm_slli_epi16 ((__m128i) x, i)); } 00534 00535 inline sse_uint16_t 00536 simd_srl (const sse_uint16_t& x, int i) { 00537 return sse_uint16_t(_mm_srli_epi16 ((__m128i) x, i)); } 00538 00539 // Printing and equalities 00540 SIMD_SUGAR (uint16_t, sse_uint16_t) 00541 00542 /****************************************************************************** 00543 * Vectors of sixteen int8_t 00544 ******************************************************************************/ 00545 00546 typedef int8_t __attribute__((vector_size(16))) sse_int8_t; 00547 00548 template<> 00549 struct simd_helper<int8_t> { 00550 typedef sse_int8_t type; 00551 static const nat size = 16; 00552 }; 00553 00554 template<> 00555 struct simd_base_helper<sse_int8_t> { 00556 typedef int8_t type; }; 00557 00558 inline sse_int8_t 00559 simd_set_duplicate (int8_t x) { 00560 return sse_int8_t(_mm_set1_epi8 ((char) x)); } 00561 00562 inline sse_int8_t 00563 simd_set (int8_t x0, int8_t x1, int8_t x2, int8_t x3, 00564 int8_t x4, int8_t x5, int8_t x6, int8_t x7, 00565 int8_t x8, int8_t x9, int8_t x10, int8_t x11, 00566 int8_t x12, int8_t x13, int8_t x14, int8_t x15) { 00567 return sse_int8_t(_mm_set_epi8 (x15, x14, x13, x12, x11, x10, x9, x8, 00568 x7, x6, x5, x4, x3, x2, x1, x0)); } 00569 00570 // Comparisons 00571 inline sse_int8_t 00572 simd_equal (const sse_int8_t& x, const sse_int8_t& y) { 00573 return sse_int8_t(_mm_cmpeq_epi8 ((__m128i) x, (__m128i) y)); } 00574 00575 inline sse_int8_t 00576 simd_less (const sse_int8_t& x, const sse_int8_t& y) { 00577 return sse_int8_t(_mm_cmplt_epi8 ((__m128i) x, (__m128i) y)); } 00578 00579 inline sse_int8_t 00580 simd_gtr (const sse_int8_t& x, const sse_int8_t& y) { 00581 return sse_int8_t(_mm_cmpgt_epi8 ((__m128i) x, (__m128i) y)); } 00582 00583 // Printing and equalities 00584 SIMD_SUGAR (int8_t, sse_int8_t) 00585 00586 /****************************************************************************** 00587 * Vectors of sixteen uint8_t 00588 ******************************************************************************/ 00589 00590 typedef uint8_t __attribute__((vector_size(16))) sse_uint8_t; 00591 00592 template<> 00593 struct simd_helper<uint8_t> { 00594 typedef sse_uint8_t type; 00595 static const nat size = 16; 00596 }; 00597 00598 template<> 00599 struct simd_base_helper<sse_uint8_t> { 00600 typedef uint8_t type; }; 00601 00602 inline sse_uint8_t 00603 simd_set_duplicate (uint8_t x) { 00604 return sse_uint8_t(_mm_set1_epi8 (x)); } 00605 00606 inline sse_uint8_t 00607 simd_set (uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, 00608 uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, 00609 uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, 00610 uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) { 00611 return sse_uint8_t(_mm_set_epi8 (x15, x14, x13, x12, x11, x10, x9, x8, 00612 x7, x6, x5, x4, x3, x2, x1, x0)); } 00613 00614 // Comparisons 00615 inline sse_uint8_t 00616 simd_equal (const sse_uint8_t& x, const sse_uint8_t& y) { 00617 return sse_uint8_t(_mm_cmpeq_epi8 ((__m128i) x, (__m128i) y)); } 00618 00619 // Min, max 00620 inline sse_uint8_t 00621 min (const sse_uint8_t& x, const sse_uint8_t& y) { 00622 return sse_uint8_t(_mm_min_epu8 ((__m128i) x, (__m128i) y)); } 00623 00624 inline sse_uint8_t 00625 max (const sse_uint8_t& x, const sse_uint8_t& y) { 00626 return sse_uint8_t(_mm_max_epu8 ((__m128i) x, (__m128i) y)); } 00627 00628 // Printing and equalities 00629 SIMD_SUGAR (uint8_t, sse_uint8_t) 00630 00631 #undef SIMD_SUGAR 00632 } // namespace mmx 00633 00634 #endif // NUMERIX_ENABLE_SIMD 00635 #endif // __MMX_SSE_HPP